diff options
34 files changed, 934 insertions, 459 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim index 69269be84..fe601759d 100755 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -57,7 +57,7 @@ type nkStrLit, # a string literal "" nkRStrLit, # a raw string literal r"" nkTripleStrLit, # a triple string literal """ - nkMetaNode, # difficult to explan; represents itself + nkMetaNode, # difficult to explain; represents itself # (used for macros) nkNilLit, # the nil literal # end of atoms @@ -217,7 +217,7 @@ type sfDiscriminant, # field is a discriminant in a record/object sfDeprecated, # symbol is deprecated sfInClosure, # variable is accessed by a closure - sfTypeCheck, # wether macro parameters should be type checked + sfThread, # proc will run as a thread sfCompileTime, # proc can be evaluated at compile time sfThreadVar, # variable is a thread variable sfMerge, # proc can be merged with itself @@ -274,7 +274,8 @@ type tfFinal, # is the object final? tfAcyclic, # type is acyclic (for GC optimization) tfEnumHasHoles, # enum cannot be mapped into a range - tfShallow # type can be shallow copied on assignment + tfShallow, # type can be shallow copied on assignment + tfThread # proc type is marked as ``thread`` TTypeFlags* = set[TTypeFlag] @@ -310,7 +311,7 @@ type TMagic* = enum # symbols that require compiler magic: mNone, mDefined, mDefinedInScope, mLow, mHigh, mSizeOf, mIs, - mEcho, mCreateThread, mShallowCopy, + mEcho, mShallowCopy, mUnaryLt, mSucc, mPred, mInc, mDec, mOrd, mNew, mNewFinalize, mNewSeq, mLengthOpenArray, mLengthStr, mLengthArray, mLengthSeq, mIncl, mExcl, mCard, mChr, mGCref, diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 88284e9fb..d9ad0573a 100755 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1452,7 +1452,6 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mIncl, mExcl, mCard, mLtSet, mLeSet, mEqSet, mMulSet, mPlusSet, mMinusSet, mInSet: genSetOp(p, e, d, op) - of mCreateThread: genCall(p, e, d) of mNewString, mNewStringOfCap, mCopyStr, mCopyStrLast, mExit: var opr = e.sons[0].sym if lfNoDecl notin opr.loc.flags: diff --git a/compiler/ccgthreadvars.nim b/compiler/ccgthreadvars.nim index e92e955c5..7ef084ba8 100644 --- a/compiler/ccgthreadvars.nim +++ b/compiler/ccgthreadvars.nim @@ -10,24 +10,23 @@ ## Thread var support for crappy architectures that lack native support for ## thread local storage. +proc emulatedThreadVars(): bool {.inline.} = + result = optThreads in gGlobalOptions + # NOW: Use the work-around everywhere, because it should be faster anyway. + #platform.OS[targetOS].props.contains(ospLacksThreadVars) + proc AccessThreadLocalVar(p: BProc, s: PSym) = - if optThreads in gGlobalOptions: - if platform.OS[targetOS].props.contains(ospLacksThreadVars): - if not p.ThreadVarAccessed: - p.ThreadVarAccessed = true - p.module.usesThreadVars = true - appf(p.s[cpsLocals], "NimThreadVars* NimTV;$n") - appcg(p, cpsInit, "NimTV=(NimThreadVars*)#GetThreadLocalVars();$n") + if emulatedThreadVars() and not p.ThreadVarAccessed: + p.ThreadVarAccessed = true + p.module.usesThreadVars = true + appf(p.s[cpsLocals], "NimThreadVars* NimTV;$n") + appcg(p, cpsInit, "NimTV=(NimThreadVars*)#GetThreadLocalVars();$n") var nimtv: PRope # nimrod thread vars nimtvDeps: seq[PType] = @[] nimtvDeclared = initIntSet() -proc emulatedThreadVars(): bool {.inline.} = - result = optThreads in gGlobalOptions and - platform.OS[targetOS].props.contains(ospLacksThreadVars) - proc declareThreadVar(m: BModule, s: PSym, isExtern: bool) = if emulatedThreadVars(): # we gather all thread locals var into a struct; we need to allocate diff --git a/compiler/msgs.nim b/compiler/msgs.nim index ed88ff551..f6ec4729a 100755 --- a/compiler/msgs.nim +++ b/compiler/msgs.nim @@ -94,7 +94,7 @@ type warnSmallLshouldNotBeUsed, warnUnknownMagic, warnRedefinitionOfLabel, warnUnknownSubstitutionX, warnLanguageXNotSupported, warnCommentXIgnored, warnXisPassedToProcVar, warnDerefDeprecated, warnAnalysisLoophole, - warnDifferentHeaps, + warnDifferentHeaps, warnWriteToForeignHeap, warnUser, hintSuccess, hintSuccessX, hintLineTooLong, hintXDeclaredButNotUsed, hintConvToBaseNotNeeded, @@ -328,7 +328,8 @@ const warnXisPassedToProcVar: "\'$1\' is passed to a procvar; deprecated [XisPassedToProcVar]", warnDerefDeprecated: "p^ is deprecated; use p[] instead [DerefDeprecated]", warnAnalysisLoophole: "thread analysis incomplete due to unkown call '$1' [AnalysisLoophole]", - warnDifferentHeaps: "possible inconsistency of thread local heaps", + warnDifferentHeaps: "possible inconsistency of thread local heaps [DifferentHeaps]", + warnWriteToForeignHeap: "write to foreign heap [WriteToForeignHeap]", warnUser: "$1 [User]", hintSuccess: "operation successful [Success]", hintSuccessX: "operation successful ($1 lines compiled; $2 sec total) [SuccessX]", @@ -345,13 +346,13 @@ const hintPath: "added path: '$1' [Path]", hintUser: "$1 [User]"] -const - WarningsToStr*: array[0..17, string] = ["CannotOpenFile", "OctalEscape", +const + WarningsToStr*: array[0..18, string] = ["CannotOpenFile", "OctalEscape", "XIsNeverRead", "XmightNotBeenInit", "CannotWriteMO2", "CannotReadMO2", "Deprecated", "SmallLshouldNotBeUsed", "UnknownMagic", "RedefinitionOfLabel", "UnknownSubstitutionX", "LanguageXNotSupported", "CommentXIgnored", "XisPassedToProcVar", "DerefDeprecated", - "AnalysisLoophole", "DifferentHeaps", "User"] + "AnalysisLoophole", "DifferentHeaps", "WriteToForeignHeap", "User"] HintsToStr*: array[0..13, string] = ["Success", "SuccessX", "LineTooLong", "XDeclaredButNotUsed", "ConvToBaseNotNeeded", "ConvFromXtoItselfNotNeeded", diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim index 754a88fda..49505be49 100755 --- a/compiler/pragmas.nim +++ b/compiler/pragmas.nim @@ -22,11 +22,11 @@ const procPragmas* = {FirstCallConv..LastCallConv, wImportc, wExportc, wNodecl, wMagic, wNosideEffect, wSideEffect, wNoreturn, wDynLib, wHeader, wCompilerProc, wPure, wProcVar, wDeprecated, wVarargs, wCompileTime, wMerge, - wBorrow, wExtern, wImportCompilerProc} + wBorrow, wExtern, wImportCompilerProc, wThread} converterPragmas* = procPragmas methodPragmas* = procPragmas macroPragmas* = {FirstCallConv..LastCallConv, wImportc, wExportc, wNodecl, - wMagic, wNosideEffect, wCompilerProc, wDeprecated, wTypeCheck, wExtern} + wMagic, wNosideEffect, wCompilerProc, wDeprecated, wExtern} iteratorPragmas* = {FirstCallConv..LastCallConv, wNosideEffect, wSideEffect, wImportc, wExportc, wNodecl, wMagic, wDeprecated, wBorrow, wExtern} stmtPragmas* = {wChecks, wObjChecks, wFieldChecks, wRangechecks, wBoundchecks, @@ -37,7 +37,7 @@ const wInfChecks, wNanChecks, wPragma, wEmit, wUnroll, wLinearScanEnd} lambdaPragmas* = {FirstCallConv..LastCallConv, wImportc, wExportc, wNodecl, wNosideEffect, wSideEffect, wNoreturn, wDynLib, wHeader, wPure, - wDeprecated, wExtern} + wDeprecated, wExtern, wThread} typePragmas* = {wImportc, wExportc, wDeprecated, wMagic, wAcyclic, wNodecl, wPure, wHeader, wCompilerProc, wFinal, wSize, wExtern, wShallow} fieldPragmas* = {wImportc, wExportc, wDeprecated, wExtern} @@ -45,7 +45,8 @@ const wMagic, wHeader, wDeprecated, wCompilerProc, wDynLib, wExtern} constPragmas* = {wImportc, wExportc, wHeader, wDeprecated, wMagic, wNodecl, wExtern} - procTypePragmas* = {FirstCallConv..LastCallConv, wVarargs, wNosideEffect} + procTypePragmas* = {FirstCallConv..LastCallConv, wVarargs, wNosideEffect, + wThread} proc pragma*(c: PContext, sym: PSym, n: PNode, validPragmas: TSpecialWords) proc pragmaAsm*(c: PContext, n: PNode): char @@ -125,10 +126,9 @@ proc processMagic(c: PContext, n: PNode, s: PSym) = s.magic = m break if s.magic == mNone: Message(n.info, warnUnknownMagic, v) - if s.magic != mCreateThread: - # magics don't need an implementation, so we - # treat them as imported, instead of modifing a lot of working code: - incl(s.flags, sfImportc) + # magics don't need an implementation, so we + # treat them as imported, instead of modifing a lot of working code: + incl(s.flags, sfImportc) proc wordToCallConv(sw: TSpecialWord): TCallingConvention = # this assumes that the order of special words and calling conventions is @@ -500,9 +500,11 @@ proc pragma(c: PContext, sym: PSym, n: PNode, validPragmas: TSpecialWords) = noVal(it) if sym.typ == nil: invalidPragma(it) incl(sym.typ.flags, tfShallow) - of wTypeCheck: + of wThread: noVal(it) - incl(sym.flags, sfTypeCheck) + incl(sym.flags, sfThread) + incl(sym.flags, sfProcVar) + if sym.typ != nil: incl(sym.typ.flags, tfThread) of wHint: Message(it.info, hintUser, expectStrLit(c, it)) of wWarning: Message(it.info, warnUser, expectStrLit(c, it)) of wError: LocalError(it.info, errUser, expectStrLit(c, it)) diff --git a/compiler/sem.nim b/compiler/sem.nim index 16c523bdd..19b3cc9e2 100755 --- a/compiler/sem.nim +++ b/compiler/sem.nim @@ -203,8 +203,9 @@ proc myProcess(context: PPassContext, n: PNode): PNode = result = ast.emptyNode proc checkThreads(c: PContext) = + if not needsGlobalAnalysis(): return for i in 0 .. c.threadEntries.len-1: - semthreads.AnalyseThread(c.threadEntries[i]) + semthreads.AnalyseThreadProc(c.threadEntries[i]) proc myClose(context: PPassContext, n: PNode): PNode = var c = PContext(context) diff --git a/compiler/semdata.nim b/compiler/semdata.nim index 7e40cd0a6..9051e4726 100755 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -41,7 +41,7 @@ type generics*: PNode # a list of the things to compile; list of # nkExprEqExpr nodes which contain the # generic symbol and the instantiated symbol - threadEntries*: PNode # list of thread entries to check + threadEntries*: TSymSeq # list of thread entries to check lastGenericIdx*: int # used for the generics stack tab*: TSymTab # each module has its own symbol table AmbiguousSymbols*: TIntSet # ids of all ambiguous symbols (cannot @@ -125,7 +125,7 @@ proc newContext(module: PSym, nimfile: string): PContext = append(result.optionStack, newOptionEntry()) result.module = module result.generics = newNode(nkStmtList) - result.threadEntries = newNode(nkStmtList) + result.threadEntries = @[] result.converters = @[] result.filename = nimfile result.includedFiles = initIntSet() diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 3841ae9fb..8f84d665d 100755 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -41,8 +41,8 @@ proc semSymGenericInstantiation(c: PContext, n: PNode, s: PSym): PNode = proc semSym(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = case s.kind of skProc, skMethod, skIterator, skConverter: - if not (sfProcVar in s.flags) and (s.typ.callConv == ccDefault) and - (getModule(s).id != c.module.id): + if sfProcVar notin s.flags and s.typ.callConv == ccDefault and + getModule(s).id != c.module.id: LocalError(n.info, errXCannotBePassedToProcVar, s.name.s) result = symChoice(c, n, s) of skConst: @@ -103,8 +103,8 @@ proc checkConvertible(info: TLineInfo, castDest, src: PType) = d = base(d) s = base(s) if d == nil: - GlobalError(info, errGenerated, `%`(msgKindToString(errIllegalConvFromXtoY), [ - typeToString(src), typeToString(castDest)])) + GlobalError(info, errGenerated, msgKindToString(errIllegalConvFromXtoY) % [ + src.typeToString, castDest.typeToString]) elif d.Kind == tyObject and s.Kind == tyObject: checkConversionBetweenObjects(info, d, s) elif (skipTypes(castDest, abstractVarRange).Kind in IntegralTypes) and @@ -195,15 +195,13 @@ proc semIs(c: PContext, n: PNode): PNode = if sonsLen(n) == 3: n.sons[1] = semExprWithType(c, n.sons[1], {efAllowType}) n.sons[2] = semExprWithType(c, n.sons[2], {efAllowType}) - var a = n.sons[1].typ - var b = n.sons[2].typ - # a and b can be nil in case of an error: - if a != nil and b != nil: - if (b.kind != tyObject) or (a.kind != tyObject): - GlobalError(n.info, errIsExpectsObjectTypes) - while (b != nil) and (b.id != a.id): b = b.sons[0] - if b == nil: - GlobalError(n.info, errXcanNeverBeOfThisSubtype, typeToString(a)) + var a = skipTypes(n.sons[1].typ, abstractPtrs) + var b = skipTypes(n.sons[2].typ, abstractPtrs) + if b.kind != tyObject or a.kind != tyObject: + GlobalError(n.info, errIsExpectsObjectTypes) + while b != nil and b.id != a.id: b = b.sons[0] + if b == nil: + GlobalError(n.info, errXcanNeverBeOfThisSubtype, typeToString(a)) n.typ = getSysType(tyBool) else: GlobalError(n.info, errIsExpectsTwoArguments) @@ -338,13 +336,13 @@ proc isAssignable(n: PNode): TAssignableResult = result = arNone case n.kind of nkSym: - if (n.sym.kind in {skVar, skTemp}): result = arLValue + if n.sym.kind in {skVar, skTemp}: result = arLValue of nkDotExpr: if skipTypes(n.sons[0].typ, abstractInst).kind in {tyVar, tyPtr, tyRef}: result = arLValue else: result = isAssignable(n.sons[0]) - if (result == arLValue) and (sfDiscriminant in n.sons[1].sym.flags): + if result == arLValue and sfDiscriminant in n.sons[1].sym.flags: result = arDiscriminant of nkBracketExpr: if skipTypes(n.sons[0].typ, abstractInst).kind in {tyVar, tyPtr, tyRef}: @@ -400,7 +398,7 @@ proc analyseIfAddressTakenInCall(c: PContext, n: PNode) = mAppendSeqElem, mNewSeq, mReset, mShallowCopy} checkMinSonsLen(n, 1) var t = n.sons[0].typ - if (n.sons[0].kind == nkSym) and (n.sons[0].sym.magic in FakeVarParams): + if n.sons[0].kind == nkSym and n.sons[0].sym.magic in FakeVarParams: # BUGFIX: check for L-Value still needs to be done for the arguments! for i in countup(1, sonsLen(n) - 1): if i < sonsLen(t) and t.sons[i] != nil and @@ -409,8 +407,8 @@ proc analyseIfAddressTakenInCall(c: PContext, n: PNode) = LocalError(n.sons[i].info, errVarForOutParamNeeded) return for i in countup(1, sonsLen(n) - 1): - if (i < sonsLen(t)) and - (skipTypes(t.sons[i], abstractInst).kind == tyVar): + if i < sonsLen(t) and + skipTypes(t.sons[i], abstractInst).kind == tyVar: n.sons[i] = analyseIfAddressTaken(c, n.sons[i]) proc semDirectCallAnalyseEffects(c: PContext, n: PNode, @@ -466,7 +464,7 @@ proc semIndirectOp(c: PContext, n: PNode, flags: TExprFlags): PNode = result = m.call # we assume that a procedure that calls something indirectly # has side-effects: - if not (tfNoSideEffect in t.flags): incl(c.p.owner.flags, sfSideEffect) + if tfNoSideEffect notin t.flags: incl(c.p.owner.flags, sfSideEffect) else: result = overloadedCallOpr(c, n) # Now that nkSym does not imply an iteration over the proc/iterator space, @@ -845,10 +843,6 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = of mSizeOf: result = semSizeof(c, setMs(n, s)) of mIs: result = semIs(c, setMs(n, s)) of mEcho: result = semEcho(c, setMs(n, s)) - of mCreateThread: - result = semDirectOp(c, n, flags) - if semthreads.needsGlobalAnalysis(): - c.threadEntries.add(result) of mShallowCopy: if sonsLen(n) == 3: # XXX ugh this is really a hack: shallowCopy() can be overloaded only @@ -1103,12 +1097,17 @@ proc semExpr(c: PContext, n: PNode, flags: TExprFlags = {}): PNode = of skTemplate: result = semTemplateExpr(c, n, s) of skType: if n.kind != nkCall: GlobalError(n.info, errXisNotCallable, s.name.s) - # XXX does this check make any sense? - result = semConv(c, n, s) + # XXX think about this more (``set`` procs) + if n.len == 2: + result = semConv(c, n, s) + elif Contains(c.AmbiguousSymbols, s.id): + LocalError(n.info, errUseQualifier, s.name.s) + elif s.magic == mNone: result = semDirectOp(c, n, flags) + else: result = semMagic(c, n, s, flags) of skProc, skMethod, skConverter, skIterator: if s.magic == mNone: result = semDirectOp(c, n, flags) else: result = semMagic(c, n, s, flags) - else: + else: #liMessage(n.info, warnUser, renderTree(n)); result = semIndirectOp(c, n, flags) elif n.sons[0].kind == nkSymChoice: diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim index 2e328d730..4a80bb783 100755 --- a/compiler/semstmts.nim +++ b/compiler/semstmts.nim @@ -549,10 +549,12 @@ proc sideEffectsCheck(c: PContext, s: PSym) = if {sfNoSideEffect, sfSideEffect} * s.flags == {sfNoSideEffect, sfSideEffect}: LocalError(s.info, errXhasSideEffects, s.name.s) + elif sfThread in s.flags and semthreads.needsGlobalAnalysis(): + c.threadEntries.add(s) proc addResult(c: PContext, t: PType, info: TLineInfo) = if t != nil: - var s = newSym(skVar, getIdent("result"), getCurrOwner()) + var s = newSym(skVar, getIdent"result", getCurrOwner()) s.info = info s.typ = t incl(s.flags, sfResult) @@ -566,7 +568,7 @@ proc addResultNode(c: PContext, n: PNode) = proc semLambda(c: PContext, n: PNode): PNode = result = n checkSonsLen(n, codePos + 1) - var s = newSym(skProc, getIdent(":anonymous"), getCurrOwner()) + var s = newSym(skProc, getIdent":anonymous", getCurrOwner()) s.info = n.info s.ast = n n.sons[namePos] = newSymNode(s) @@ -594,10 +596,11 @@ proc semLambda(c: PContext, n: PNode): PNode = popProcCon(c) else: LocalError(n.info, errImplOfXexpected, s.name.s) + sideEffectsCheck(c, s) closeScope(c.tab) # close scope for parameters popOwner() result.typ = s.typ - + proc semProcAux(c: PContext, n: PNode, kind: TSymKind, validPragmas: TSpecialWords): PNode = var diff --git a/compiler/semthreads.nim b/compiler/semthreads.nim index 3ca52fc30..bff8823b2 100644 --- a/compiler/semthreads.nim +++ b/compiler/semthreads.nim @@ -18,7 +18,7 @@ ## The only crucial operation that can violate the heap invariants is the ## write access. The analysis needs to distinguish between 'unknown', 'mine', ## and 'theirs' memory and pointers. Assignments 'whatever <- unknown' are -## invalid, and so are 'theirs <- mine' but not 'mine <- theirs'. Since +## invalid, and so are 'theirs <- whatever' but not 'mine <- theirs'. Since ## strings and sequences are heap allocated they are affected too: ## ## .. code-block:: nimrod @@ -30,8 +30,9 @@ ## If the type system would distinguish between 'ref' and '!ref' and threads ## could not have '!ref' as input parameters the analysis could simply need to ## reject any write access to a global variable which contains GC'ed data. -## However, '!ref' is not implemented yet and this scheme would be too -## restrictive anyway. +## Thanks to the write barrier of the GC, this is exactly what needs to be +## done! Every write access to a global that contains GC'ed data needs to +## be prevented! Unfortunately '!ref' is not implemented yet... ## ## The assignment target is essential for the algorithm: only ## write access to heap locations and global variables are critical and need @@ -42,7 +43,8 @@ ## ## var x = globalVar # 'x' points to 'theirs' ## while true: -## globalVar = x # OK: 'theirs <- theirs' +## globalVar = x # NOT OK: 'theirs <- theirs' invalid due to +## # write barrier! ## x = "new string" # ugh: 'x is toUnknown'! ## ## --> Solution: toUnknown is never allowed anywhere! @@ -106,10 +108,12 @@ proc analyseSym(c: PProcCtx, n: PNode): TThreadOwner = if result != toUndefined: return case v.kind of skVar: + result = toNil if sfGlobal in v.flags: - result = if sfThreadVar in v.flags: toMine else: toTheirs - else: - result = toNil + if sfThreadVar in v.flags: + result = toMine + elif containsTyRef(v.typ): + result = toTheirs of skTemp, skForVar: result = toNil of skConst: result = toMine of skParam: @@ -136,7 +140,8 @@ proc writeAccess(c: PProcCtx, n: PNode, owner: TThreadOwner) = of toNil: c.mapping[v.id] = owner # fine, toNil can be overwritten of toVoid, toUndefined: InternalError(n.info, "writeAccess") - of toTheirs, toMine: + of toTheirs: Message(n.info, warnWriteToForeignHeap) + of toMine: if lastOwner != owner and owner != toNil: Message(n.info, warnDifferentHeaps) else: @@ -145,7 +150,8 @@ proc writeAccess(c: PProcCtx, n: PNode, owner: TThreadOwner) = case lastOwner of toNil: nil # fine, toNil can be overwritten of toVoid, toUndefined: InternalError(n.info, "writeAccess") - of toTheirs, toMine: + of toTheirs: Message(n.info, warnWriteToForeignHeap) + of toMine: if lastOwner != owner and owner != toNil: Message(n.info, warnDifferentHeaps) @@ -171,7 +177,8 @@ proc analyseCall(c: PProcCtx, n: PNode): TThreadOwner = newCtx.mapping[formal.id] = call.args[i-1] pushInfoContext(n.info) result = analyse(newCtx, prc.ast.sons[codePos]) - if prc.ast.sons[codePos].kind == nkEmpty and sfNoSideEffect notin prc.flags: + if prc.ast.sons[codePos].kind == nkEmpty and + {sfNoSideEffect, sfThread} * prc.flags == {}: Message(n.info, warnAnalysisLoophole, renderTree(n)) if prc.typ.sons[0] != nil: if prc.ast.len > resultPos: @@ -228,7 +235,7 @@ proc analyseArgs(c: PProcCtx, n: PNode, start = 1) = proc analyseOp(c: PProcCtx, n: PNode): TThreadOwner = if n[0].kind != nkSym or n[0].sym.kind != skProc: - if tfNoSideEffect notin n[0].typ.flags: + if {tfNoSideEffect, tfThread} * n[0].typ.flags == {}: Message(n.info, warnAnalysisLoophole, renderTree(n)) result = toNil else: @@ -335,22 +342,26 @@ proc analyse(c: PProcCtx, n: PNode): TThreadOwner = result = toVoid else: InternalError(n.info, "analysis not implemented for: " & $n.kind) -proc analyseThreadCreationCall(n: PNode) = - # thread proc is second param of ``createThread``: - if n[2].kind != nkSym or n[2].sym.kind != skProc: - Message(n.info, warnAnalysisLoophole, renderTree(n)) - return - var prc = n[2].sym +proc analyseThreadProc*(prc: PSym) = var c = newProcCtx(prc) - var formal = skipTypes(prc.typ, abstractInst).n.sons[1].sym - c.mapping[formal.id] = toTheirs # thread receives foreign data! + var formals = skipTypes(prc.typ, abstractInst).n + for i in 1 .. formals.len-1: + var formal = formals.sons[i].sym + c.mapping[formal.id] = toTheirs # thread receives foreign data! discard analyse(c, prc.ast.sons[codePos]) +when false: + proc analyseThreadCreationCall(n: PNode) = + # thread proc is second param of ``createThread``: + if n[2].kind != nkSym or n[2].sym.kind != skProc: + Message(n.info, warnAnalysisLoophole, renderTree(n)) + return + analyseProc(n[2].sym) + + proc AnalyseThread*(threadCreation: PNode) = + analyseThreadCreationCall(threadCreation) + proc needsGlobalAnalysis*: bool = result = gGlobalOptions * {optThreads, optThreadAnalysis} == {optThreads, optThreadAnalysis} -proc AnalyseThread*(threadCreation: PNode) = - if needsGlobalAnalysis(): - analyseThreadCreationCall(threadCreation) - diff --git a/compiler/sigmatch.nim b/compiler/sigmatch.nim index ac1429e4e..ec689d315 100755 --- a/compiler/sigmatch.nim +++ b/compiler/sigmatch.nim @@ -220,6 +220,9 @@ proc procTypeRel(mapping: var TIdTable, f, a: PType): TTypeRelation = result = isNone if tfNoSideEffect in f.flags and tfNoSideEffect notin a.flags: result = isNone + elif tfThread in f.flags and a.flags * {tfThread, tfNoSideEffect} == {}: + # noSideEffect implies ``tfThread``! + result = isNone else: nil proc typeRel(mapping: var TIdTable, f, a: PType): TTypeRelation = diff --git a/compiler/types.nim b/compiler/types.nim index 8769993f2..01504ec56 100755 --- a/compiler/types.nim +++ b/compiler/types.nim @@ -331,6 +331,13 @@ proc containsGarbageCollectedRef(typ: PType): bool = # that are garbage-collected) result = searchTypeFor(typ, isGBCRef) +proc isTyRef(t: PType): bool = + result = t.kind == tyRef + +proc containsTyRef*(typ: PType): bool = + # returns true if typ contains a 'ref' + result = searchTypeFor(typ, isTyRef) + proc isHiddenPointer(t: PType): bool = result = t.kind in {tyString, tySequence} @@ -484,6 +491,9 @@ proc TypeToString(typ: PType, prefer: TPreferedDesc = preferName): string = if tfNoSideEffect in t.flags: addSep(prag) add(prag, "noSideEffect") + if tfThread in t.flags: + addSep(prag) + add(prag, "thread") if len(prag) != 0: add(result, "{." & prag & ".}") else: result = typeToStr[t.kind] diff --git a/compiler/wordrecg.nim b/compiler/wordrecg.nim index 5335c2ccf..b55f13ae9 100755 --- a/compiler/wordrecg.nim +++ b/compiler/wordrecg.nim @@ -33,7 +33,7 @@ type wWithout, wXor, wYield, wColon, wColonColon, wEquals, wDot, wDotDot, wHat, wStar, wMinus, - wMagic, wTypeCheck, wFinal, wProfiler, wObjChecks, + wMagic, wThread, wFinal, wProfiler, wObjChecks, wImportCompilerProc, wImportc, wExportc, wExtern, wAlign, wNodecl, wPure, wVolatile, wRegister, wSideeffect, wHeader, @@ -80,7 +80,7 @@ const "yield", ":", "::", "=", ".", "..", "^", "*", "-", - "magic", "typecheck", "final", "profiler", "objchecks", + "magic", "thread", "final", "profiler", "objchecks", "importcompilerproc", "importc", "exportc", "extern", "align", "nodecl", "pure", "volatile", "register", "sideeffect", "header", "nosideeffect", "noreturn", "merge", "lib", "dynlib", diff --git a/doc/lib.txt b/doc/lib.txt index d3ad7dd18..25f2d42dd 100755 --- a/doc/lib.txt +++ b/doc/lib.txt @@ -52,6 +52,8 @@ Collections and algorithms * `lists <lists.html>`_ Nimrod linked list support. Contains singly and doubly linked lists and circular lists ("rings"). +* `queues <queues.html>`_ + Implementation of a queue. The underlying implementation uses a ``seq``. * `intsets <intsets.html>`_ Efficient implementation of a set of ints as a sparse bit set. diff --git a/install.txt b/install.txt index 9eed69e54..345a6846c 100755 --- a/install.txt +++ b/install.txt @@ -45,9 +45,9 @@ manually. An alternative is to create a symbolic link in ``/usr/bin``:: [sudo] ln -s $your_install_dir/bin/nimrod /usr/bin/nimrod -From version 0.7.10 onwards ``install.sh`` and ``deinstall.sh`` scripts are -provided for distributing the files over the UNIX hierarchy. However, -updating your Nimrod installation is more cumbersome then. +There are also ``install.sh`` and ``deinstall.sh`` scripts for distributing +the files over the UNIX hierarchy. However, updating your Nimrod installation +is more cumbersome then. Installation on the Macintosh diff --git a/lib/pure/collections/queues.nim b/lib/pure/collections/queues.nim new file mode 100644 index 000000000..2130d9949 --- /dev/null +++ b/lib/pure/collections/queues.nim @@ -0,0 +1,89 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implementation of a queue. The underlying implementation uses a ``seq``. + +import math + +type + TQueue* {.pure, final.}[T] = object ## a queue + data: seq[T] + rd, wr, count, mask: int + +proc initQueue*[T](initialSize=4): TQueue[T] = + ## creates a new queue. `initialSize` needs to be a power of 2. + assert IsPowerOfTwo(initialSize) + result.mask = initialSize-1 + newSeq(result.data, initialSize) + +proc len*[T](q: TQueue[T]): int = + ## returns the number of elements of `q`. + result = q.count + +iterator items*[T](q: TQueue[T]): T = + ## yields every element of `q`. + var i = q.rd + var c = q.count + while c > 0: + dec c + yield q.data[i] + i = (i + 1) and q.mask + +proc add*[T](q: var TQueue[T], item: T) = + ## adds an `item` to the end of the queue `q`. + var cap = q.mask+1 + if q.count >= cap: + var n: seq[T] + newSeq(n, cap*2) + var i = 0 + for x in items(q): + shallowCopy(n[i], x) + inc i + shallowCopy(q.data, n) + q.mask = cap*2 - 1 + q.wr = q.count + q.rd = 0 + inc q.count + q.data[q.wr] = item + q.wr = (q.wr + 1) and q.mask + +proc enqueue*[T](q: var TQueue[T], item: T) = + ## alias for the ``add`` operation. + add(q, item) + +proc dequeue*[T](q: var TQueue[T]): T = + ## removes and returns the first element of the queue `q`. + assert q.count > 0 + dec q.count + result = q.data[q.rd] + q.rd = (q.rd + 1) and q.mask + +proc `$`*[T](q: TQueue[T]): string = + ## turns a queue into its string representation. + result = "[" + for x in items(q): + if result.len > 1: result.add(", ") + result.add($x) + result.add("]") + +when isMainModule: + var q = initQueue[int]() + q.add(123) + q.add(9) + q.add(4) + var first = q.dequeue + q.add(56) + q.add(6) + var second = q.dequeue + q.add(789) + + assert first == 123 + assert second == 9 + assert($q == "[4, 56, 6, 789]") + diff --git a/lib/pure/marshal.nim b/lib/pure/marshal.nim index f96d177ae..354d70a71 100755 --- a/lib/pure/marshal.nim +++ b/lib/pure/marshal.nim @@ -8,7 +8,26 @@ # ## This module contains procs for serialization and deseralization of -## arbitrary Nimrod data structures. The serialization format uses JSON. +## arbitrary Nimrod data structures. The serialization format uses JSON. +## +## **Restriction**: For objects their type is **not** serialized. This means +## essentially that it does not work if the object has some other runtime +## type than its compiletime type: +## +## .. code-block:: nimrod +## +## type +## TA = object +## TB = object of TA +## f: int +## +## var +## a: ref TA +## b: ref TB +## +## new(b) +## a = b +## echo($$a[]) # produces "{}", not "{f: 0}" import streams, typeinfo, json, intsets, tables @@ -286,3 +305,15 @@ when isMainModule: echo($$test7) testit(test7) + type + TA = object + TB = object of TA + f: int + + var + a: ref TA + b: ref TB + new(b) + a = b + echo($$a[]) # produces "{}", not "{f: 0}" + diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index 60bef813d..2b7047143 100755 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -77,11 +77,14 @@ proc startProcess*(command: string, ## If ``env == nil`` the environment is inherited of ## the parent process. `options` are additional flags that may be passed ## to `startProcess`. See the documentation of ``TProcessOption`` for the - ## meaning of these flags. + ## meaning of these flags. You need to `close` the process when done. ## ## Return value: The newly created process object. Nil is never returned, ## but ``EOS`` is raised in case of an error. +proc close*(p: PProcess) {.rtl, extern: "nosp$1".} + ## When the process has finished executing, cleanup related handles + proc suspend*(p: PProcess) {.rtl, extern: "nosp$1".} ## Suspends the process `p`. @@ -179,6 +182,7 @@ proc execProcesses*(cmds: openArray[string], err.add("\n") echo(err) result = max(waitForExit(q[r]), result) + if q[r] != nil: close(q[r]) q[r] = startProcessAux(cmds[i], options=options) r = (r + 1) mod n else: @@ -189,15 +193,18 @@ proc execProcesses*(cmds: openArray[string], if not running(q[r]): #echo(outputStream(q[r]).readLine()) result = max(waitForExit(q[r]), result) + if q[r] != nil: close(q[r]) q[r] = startProcessAux(cmds[i], options=options) inc(i) if i > high(cmds): break for i in 0..m-1: + if q[i] != nil: close(q[i]) result = max(waitForExit(q[i]), result) else: for i in 0..high(cmds): var p = startProcessAux(cmds[i], options=options) result = max(waitForExit(p), result) + close(p) proc select*(readfds: var seq[PProcess], timeout = 500): int ## `select` with a sensible Nimrod interface. `timeout` is in miliseconds. @@ -215,6 +222,8 @@ when not defined(useNimRtl): while running(p) or not outp.atEnd(outp): result.add(outp.readLine()) result.add("\n") + outp.close(outp) + close(p) when false: proc deallocCStringArray(a: cstringArray) = @@ -356,6 +365,12 @@ when defined(Windows) and not defined(useNimRtl): result.FProcessHandle = procInfo.hProcess result.id = procInfo.dwProcessID + proc close(p: PProcess) = + discard CloseHandle(p.inputHandle) + discard CloseHandle(p.outputHandle) + discard CloseHandle(p.errorHandle) + discard CloseHandle(p.FProcessHandle) + proc suspend(p: PProcess) = discard SuspendThread(p.FProcessHandle) @@ -523,6 +538,11 @@ elif not defined(useNimRtl): discard close(p_stdin[readIdx]) discard close(p_stdout[writeIdx]) + proc close(p: PProcess) = + discard close(p.inputHandle) + discard close(p.outputHandle) + discard close(p.errorHandle) + proc suspend(p: PProcess) = discard kill(p.id, SIGSTOP) diff --git a/lib/system.nim b/lib/system.nim index 5ece9375e..5c7102664 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -785,6 +785,10 @@ when hasThreadSupport and not hasSharedHeap: else: {.pragma: rtlThreadVar.} +template sysAssert(cond: expr) = + # change this to activate system asserts + nil + include "system/inclrtl" when not defined(ecmascript) and not defined(nimrodVm): @@ -1251,7 +1255,7 @@ proc each*[T](data: var openArray[T], op: proc (x: var T)) = for i in 0..data.len-1: op(data[i]) iterator fields*[T: tuple](x: T): expr {.magic: "Fields", noSideEffect.} - ## iterates over every field of `x`. Warning: This is really transforms + ## iterates over every field of `x`. Warning: This really transforms ## the 'for' and unrolls the loop. The current implementation also has a bug ## that affects symbol binding in the loop body. iterator fields*[S: tuple, T: tuple](x: S, y: T): tuple[a, b: expr] {. @@ -1261,13 +1265,13 @@ iterator fields*[S: tuple, T: tuple](x: S, y: T): tuple[a, b: expr] {. ## The current implementation also has a bug that affects symbol binding ## in the loop body. iterator fieldPairs*[T: tuple](x: T): expr {.magic: "FieldPairs", noSideEffect.} - ## iterates over every field of `x`. Warning: This is really transforms + ## iterates over every field of `x`. Warning: This really transforms ## the 'for' and unrolls the loop. The current implementation also has a bug ## that affects symbol binding in the loop body. iterator fieldPairs*[S: tuple, T: tuple](x: S, y: T): tuple[a, b: expr] {. magic: "FieldPairs", noSideEffect.} ## iterates over every field of `x` and `y`. - ## Warning: This is really transforms the 'for' and unrolls the loop. + ## Warning: This really transforms the 'for' and unrolls the loop. ## The current implementation also has a bug that affects symbol binding ## in the loop body. @@ -1703,10 +1707,10 @@ when not defined(EcmaScript) and not defined(NimrodVM): # ---------------------------------------------------------------------------- - proc atomicInc*(memLoc: var int, x: int): int {.inline.} + proc atomicInc*(memLoc: var int, x: int = 1): int {.inline.} ## atomic increment of `memLoc`. Returns the value after the operation. - proc atomicDec*(memLoc: var int, x: int): int {.inline.} + proc atomicDec*(memLoc: var int, x: int = 1): int {.inline.} ## atomic decrement of `memLoc`. Returns the value after the operation. include "system/atomics" @@ -1719,6 +1723,7 @@ when not defined(EcmaScript) and not defined(NimrodVM): context: C_JmpBuf when hasThreadSupport: + include "system/syslocks" include "system/threads" else: initStackBottom() @@ -1739,14 +1744,14 @@ when not defined(EcmaScript) and not defined(NimrodVM): proc reprAny(p: pointer, typ: PNimType): string {.compilerRtl.} proc getDiscriminant(aa: Pointer, n: ptr TNimNode): int = - assert(n.kind == nkCase) + sysAssert(n.kind == nkCase) var d: int var a = cast[TAddress](aa) case n.typ.size of 1: d = ze(cast[ptr int8](a +% n.offset)[]) of 2: d = ze(cast[ptr int16](a +% n.offset)[]) of 4: d = int(cast[ptr int32](a +% n.offset)[]) - else: assert(false) + else: sysAssert(false) return d proc selectBranch(aa: Pointer, n: ptr TNimNode): ptr TNimNode = @@ -1764,6 +1769,8 @@ when not defined(EcmaScript) and not defined(NimrodVM): {.pop.} include "system/sysio" + when hasThreadSupport: + include "system/inboxes" iterator lines*(filename: string): string = ## Iterate over any line in the file named `filename`. diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 3273242d6..8a54e0ddd 100755 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -128,12 +128,12 @@ template bigChunkOverhead(): expr = sizeof(TBigChunk)-sizeof(TAlignType) proc roundup(x, v: int): int {.inline.} = result = (x + (v-1)) and not (v-1) - assert(result >= x) + sysAssert(result >= x) #return ((-x) and (v-1)) +% x -assert(roundup(14, PageSize) == PageSize) -assert(roundup(15, 8) == 16) -assert(roundup(65, 8) == 72) +sysAssert(roundup(14, PageSize) == PageSize) +sysAssert(roundup(15, 8) == 16) +sysAssert(roundup(65, 8) == 72) # ------------- chunk table --------------------------------------------------- # We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk @@ -149,35 +149,35 @@ type acc: int # accumulator next: PLLChunk # next low-level chunk; only needed for dealloc - TAllocator {.final, pure.} = object + TMemRegion {.final, pure.} = object llmem: PLLChunk currMem, maxMem, freeMem: int # memory sizes (allocated from OS) lastSize: int # needed for the case that OS gives us pages linearly freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk] freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access chunkStarts: TIntSet - -proc incCurrMem(a: var TAllocator, bytes: int) {.inline.} = + +proc incCurrMem(a: var TMemRegion, bytes: int) {.inline.} = inc(a.currMem, bytes) -proc decCurrMem(a: var TAllocator, bytes: int) {.inline.} = +proc decCurrMem(a: var TMemRegion, bytes: int) {.inline.} = a.maxMem = max(a.maxMem, a.currMem) dec(a.currMem, bytes) -proc getMaxMem(a: var TAllocator): int = +proc getMaxMem(a: var TMemRegion): int = # Since we update maxPagesCount only when freeing pages, # maxPagesCount may not be up to date. Thus we use the # maximum of these both values here: return max(a.currMem, a.maxMem) -proc llAlloc(a: var TAllocator, size: int): pointer = +proc llAlloc(a: var TMemRegion, size: int): pointer = # *low-level* alloc for the memory managers data structures. Deallocation # is done at he end of the allocator's life time. if a.llmem == nil or size > a.llmem.size: # the requested size is ``roundup(size+sizeof(TLLChunk), PageSize)``, but # since we know ``size`` is a (small) constant, we know the requested size # is one page: - assert roundup(size+sizeof(TLLChunk), PageSize) == PageSize + sysAssert roundup(size+sizeof(TLLChunk), PageSize) == PageSize var old = a.llmem # can be nil and is correct with nil a.llmem = cast[PLLChunk](osAllocPages(PageSize)) incCurrMem(a, PageSize) @@ -189,7 +189,7 @@ proc llAlloc(a: var TAllocator, size: int): pointer = inc(a.llmem.acc, size) zeroMem(result, size) -proc llDeallocAll(a: var TAllocator) = +proc llDeallocAll(a: var TMemRegion) = var it = a.llmem while it != nil: # we know each block in the list has the size of 1 page: @@ -204,7 +204,7 @@ proc IntSetGet(t: TIntSet, key: int): PTrunk = it = it.next result = nil -proc IntSetPut(a: var TAllocator, t: var TIntSet, key: int): PTrunk = +proc IntSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk = result = IntSetGet(t, key) if result == nil: result = cast[PTrunk](llAlloc(a, sizeof(result[]))) @@ -220,7 +220,7 @@ proc Contains(s: TIntSet, key: int): bool = else: result = false -proc Incl(a: var TAllocator, s: var TIntSet, key: int) = +proc Incl(a: var TMemRegion, s: var TIntSet, key: int) = var t = IntSetPut(a, s, key shr TrunkShift) var u = key and TrunkMask t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask)) @@ -259,13 +259,13 @@ proc pageIndex(p: pointer): int {.inline.} = proc pageAddr(p: pointer): PChunk {.inline.} = result = cast[PChunk](cast[TAddress](p) and not PageMask) - #assert(Contains(allocator.chunkStarts, pageIndex(result))) + #sysAssert(Contains(allocator.chunkStarts, pageIndex(result))) -proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = +proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk = incCurrMem(a, size) inc(a.freeMem, size) result = cast[PBigChunk](osAllocPages(size)) - assert((cast[TAddress](result) and PageMask) == 0) + sysAssert((cast[TAddress](result) and PageMask) == 0) #zeroMem(result, size) result.next = nil result.prev = nil @@ -273,7 +273,7 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = result.size = size # update next.prevSize: var nxt = cast[TAddress](result) +% size - assert((nxt and PageMask) == 0) + sysAssert((nxt and PageMask) == 0) var next = cast[PChunk](nxt) if pageIndex(next) in a.chunkStarts: #echo("Next already allocated!") @@ -281,7 +281,7 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = # set result.prevSize: var lastSize = if a.lastSize != 0: a.lastSize else: PageSize var prv = cast[TAddress](result) -% lastSize - assert((nxt and PageMask) == 0) + sysAssert((nxt and PageMask) == 0) var prev = cast[PChunk](prv) if pageIndex(prev) in a.chunkStarts and prev.size == lastSize: #echo("Prev already allocated!") @@ -290,11 +290,11 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = result.prevSize = 0 # unknown a.lastSize = size # for next request -proc freeOsChunks(a: var TAllocator, p: pointer, size: int) = +proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) = # update next.prevSize: var c = cast[PChunk](p) var nxt = cast[TAddress](p) +% c.size - assert((nxt and PageMask) == 0) + sysAssert((nxt and PageMask) == 0) var next = cast[PChunk](nxt) if pageIndex(next) in a.chunkStarts: next.prevSize = 0 # XXX used @@ -304,7 +304,7 @@ proc freeOsChunks(a: var TAllocator, p: pointer, size: int) = dec(a.freeMem, size) #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size) -proc isAccessible(a: TAllocator, p: pointer): bool {.inline.} = +proc isAccessible(a: TMemRegion, p: pointer): bool {.inline.} = result = Contains(a.chunkStarts, pageIndex(p)) proc contains[T](list, x: T): bool = @@ -313,7 +313,7 @@ proc contains[T](list, x: T): bool = if it == x: return true it = it.next -proc writeFreeList(a: TAllocator) = +proc writeFreeList(a: TMemRegion) = var it = a.freeChunksList c_fprintf(c_stdout, "freeChunksList: %p\n", it) while it != nil: @@ -322,23 +322,23 @@ proc writeFreeList(a: TAllocator) = it = it.next proc ListAdd[T](head: var T, c: T) {.inline.} = - assert(c notin head) - assert c.prev == nil - assert c.next == nil + sysAssert(c notin head) + sysAssert c.prev == nil + sysAssert c.next == nil c.next = head if head != nil: - assert head.prev == nil + sysAssert head.prev == nil head.prev = c head = c proc ListRemove[T](head: var T, c: T) {.inline.} = - assert(c in head) + sysAssert(c in head) if c == head: head = c.next - assert c.prev == nil + sysAssert c.prev == nil if head != nil: head.prev = nil else: - assert c.prev != nil + sysAssert c.prev != nil c.prev.next = c.next if c.next != nil: c.next.prev = c.prev c.next = nil @@ -350,22 +350,22 @@ proc isSmallChunk(c: PChunk): bool {.inline.} = proc chunkUnused(c: PChunk): bool {.inline.} = result = not c.used -proc updatePrevSize(a: var TAllocator, c: PBigChunk, +proc updatePrevSize(a: var TMemRegion, c: PBigChunk, prevSize: int) {.inline.} = var ri = cast[PChunk](cast[TAddress](c) +% c.size) - assert((cast[TAddress](ri) and PageMask) == 0) + sysAssert((cast[TAddress](ri) and PageMask) == 0) if isAccessible(a, ri): ri.prevSize = prevSize -proc freeBigChunk(a: var TAllocator, c: PBigChunk) = +proc freeBigChunk(a: var TMemRegion, c: PBigChunk) = var c = c - assert(c.size >= PageSize) + sysAssert(c.size >= PageSize) inc(a.freeMem, c.size) when coalescRight: var ri = cast[PChunk](cast[TAddress](c) +% c.size) - assert((cast[TAddress](ri) and PageMask) == 0) + sysAssert((cast[TAddress](ri) and PageMask) == 0) if isAccessible(a, ri) and chunkUnused(ri): - assert(not isSmallChunk(ri)) + sysAssert(not isSmallChunk(ri)) if not isSmallChunk(ri): ListRemove(a.freeChunksList, cast[PBigChunk](ri)) inc(c.size, ri.size) @@ -373,9 +373,9 @@ proc freeBigChunk(a: var TAllocator, c: PBigChunk) = when coalescLeft: if c.prevSize != 0: var le = cast[PChunk](cast[TAddress](c) -% c.prevSize) - assert((cast[TAddress](le) and PageMask) == 0) + sysAssert((cast[TAddress](le) and PageMask) == 0) if isAccessible(a, le) and chunkUnused(le): - assert(not isSmallChunk(le)) + sysAssert(not isSmallChunk(le)) if not isSmallChunk(le): ListRemove(a.freeChunksList, cast[PBigChunk](le)) inc(le.size, c.size) @@ -390,9 +390,9 @@ proc freeBigChunk(a: var TAllocator, c: PBigChunk) = else: freeOsChunks(a, c, c.size) -proc splitChunk(a: var TAllocator, c: PBigChunk, size: int) = +proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) = var rest = cast[PBigChunk](cast[TAddress](c) +% size) - assert(rest notin a.freeChunksList) + sysAssert(rest notin a.freeChunksList) rest.size = c.size - size rest.used = false rest.next = nil @@ -403,14 +403,14 @@ proc splitChunk(a: var TAllocator, c: PBigChunk, size: int) = incl(a, a.chunkStarts, pageIndex(rest)) ListAdd(a.freeChunksList, rest) -proc getBigChunk(a: var TAllocator, size: int): PBigChunk = +proc getBigChunk(a: var TMemRegion, size: int): PBigChunk = # use first fit for now: - assert((size and PageMask) == 0) - assert(size > 0) + sysAssert((size and PageMask) == 0) + sysAssert(size > 0) result = a.freeChunksList block search: while result != nil: - assert chunkUnused(result) + sysAssert chunkUnused(result) if result.size == size: ListRemove(a.freeChunksList, result) break search @@ -419,7 +419,7 @@ proc getBigChunk(a: var TAllocator, size: int): PBigChunk = splitChunk(a, result, size) break search result = result.next - assert result != a.freeChunksList + sysAssert result != a.freeChunksList if size < InitialMemoryRequest: result = requestOsChunks(a, InitialMemoryRequest) splitChunk(a, result, size) @@ -430,10 +430,10 @@ proc getBigChunk(a: var TAllocator, size: int): PBigChunk = incl(a, a.chunkStarts, pageIndex(result)) dec(a.freeMem, size) -proc getSmallChunk(a: var TAllocator): PSmallChunk = +proc getSmallChunk(a: var TMemRegion): PSmallChunk = var res = getBigChunk(a, PageSize) - assert res.prev == nil - assert res.next == nil + sysAssert res.prev == nil + sysAssert res.next == nil result = cast[PSmallChunk](res) # ----------------------------------------------------------------------------- @@ -442,9 +442,13 @@ proc getCellSize(p: pointer): int {.inline.} = var c = pageAddr(p) result = c.size -proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = - assert(roundup(65, 8) == 72) - assert requestedSize >= sizeof(TFreeCell) +proc memSize(a: TMemRegion, p: pointer): int {.inline.} = + var c = pageAddr(p) + result = c.size + +proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer = + sysAssert(roundup(65, 8) == 72) + sysAssert requestedSize >= sizeof(TFreeCell) var size = roundup(requestedSize, MemAlign) #c_fprintf(c_stdout, "alloc; size: %ld; %ld\n", requestedSize, size) if size <= SmallChunkSize-smallChunkOverhead(): @@ -454,7 +458,7 @@ proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = if c == nil: c = getSmallChunk(a) c.freeList = nil - assert c.size == PageSize + sysAssert c.size == PageSize c.size = size c.acc = size c.free = SmallChunkSize - smallChunkOverhead() - size @@ -462,36 +466,40 @@ proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = c.prev = nil ListAdd(a.freeSmallChunks[s], c) result = addr(c.data) - assert((cast[TAddress](result) and (MemAlign-1)) == 0) + sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0) else: - assert c.next != c + sysAssert c.next != c #if c.size != size: # c_fprintf(c_stdout, "csize: %lld; size %lld\n", c.size, size) - assert c.size == size + sysAssert c.size == size if c.freeList == nil: - assert(c.acc + smallChunkOverhead() + size <= SmallChunkSize) + sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize) result = cast[pointer](cast[TAddress](addr(c.data)) +% c.acc) inc(c.acc, size) else: result = c.freeList - assert(c.freeList.zeroField == 0) + sysAssert(c.freeList.zeroField == 0) c.freeList = c.freeList.next dec(c.free, size) - assert((cast[TAddress](result) and (MemAlign-1)) == 0) + sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0) if c.free < size: ListRemove(a.freeSmallChunks[s], c) else: size = roundup(requestedSize+bigChunkOverhead(), PageSize) # allocate a large block var c = getBigChunk(a, size) - assert c.prev == nil - assert c.next == nil - assert c.size == size + sysAssert c.prev == nil + sysAssert c.next == nil + sysAssert c.size == size result = addr(c.data) - assert((cast[TAddress](result) and (MemAlign-1)) == 0) - assert(isAccessible(a, result)) + sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0) + sysAssert(isAccessible(a, result)) + +proc rawAlloc0(a: var TMemRegion, requestedSize: int): pointer = + result = rawAlloc(a, requestedSize) + zeroMem(result, requestedSize) -proc rawDealloc(a: var TAllocator, p: pointer) = +proc rawDealloc(a: var TMemRegion, p: pointer) = var c = pageAddr(p) if isSmallChunk(c): # `p` is within a small chunk: @@ -499,7 +507,7 @@ proc rawDealloc(a: var TAllocator, p: pointer) = var s = c.size var f = cast[ptr TFreeCell](p) #echo("setting to nil: ", $cast[TAddress](addr(f.zeroField))) - assert(f.zeroField != 0) + sysAssert(f.zeroField != 0) f.zeroField = 0 f.next = c.freeList c.freeList = f @@ -509,7 +517,7 @@ proc rawDealloc(a: var TAllocator, p: pointer) = s -% sizeof(TFreeCell)) # check if it is not in the freeSmallChunks[s] list: if c.free < s: - assert c notin a.freeSmallChunks[s div memAlign] + sysAssert c notin a.freeSmallChunks[s div memAlign] # add it to the freeSmallChunks[s] array: ListAdd(a.freeSmallChunks[s div memAlign], c) inc(c.free, s) @@ -525,7 +533,7 @@ proc rawDealloc(a: var TAllocator, p: pointer) = # free big chunk freeBigChunk(a, cast[PBigChunk](c)) -proc isAllocatedPtr(a: TAllocator, p: pointer): bool = +proc isAllocatedPtr(a: TMemRegion, p: pointer): bool = if isAccessible(a, p): var c = pageAddr(p) if not chunkUnused(c): @@ -539,40 +547,40 @@ proc isAllocatedPtr(a: TAllocator, p: pointer): bool = var c = cast[PBigChunk](c) result = p == addr(c.data) and cast[ptr TFreeCell](p).zeroField >% 1 -proc deallocOsPages(a: var TAllocator) = - # we free every 'ordinarily' allocated page by iterating over the page - # bits: - for p in elements(a.chunkStarts): +proc deallocOsPages(a: var TMemRegion) = + # we free every 'ordinarily' allocated page by iterating over the page bits: + for p in elements(a.chunkStarts): var page = cast[PChunk](p shl pageShift) var size = if page.size < PageSize: PageSize else: page.size osDeallocPages(page, size) # And then we free the pages that are in use for the page bits: llDeallocAll(a) -var - allocator {.rtlThreadVar.}: TAllocator +proc getFreeMem(a: TMemRegion): int {.inline.} = result = a.freeMem +proc getTotalMem(a: TMemRegion): int {.inline.} = result = a.currMem +proc getOccupiedMem(a: TMemRegion): int {.inline.} = + result = a.currMem - a.freeMem -proc deallocOsPages = deallocOsPages(allocator) +# ---------------------- thread memory region ------------------------------- -# ---------------------- interface to programs ------------------------------- +template InstantiateForRegion(allocator: expr) = + proc deallocOsPages = deallocOsPages(allocator) -when not defined(useNimRtl): - - proc unlockedAlloc(size: int): pointer {.inline.} = + proc unlockedAlloc(size: int): pointer = result = rawAlloc(allocator, size+sizeof(TFreeCell)) cast[ptr TFreeCell](result).zeroField = 1 # mark it as used - assert(not isAllocatedPtr(allocator, result)) + sysAssert(not isAllocatedPtr(allocator, result)) result = cast[pointer](cast[TAddress](result) +% sizeof(TFreeCell)) - proc unlockedAlloc0(size: int): pointer {.inline.} = + proc unlockedAlloc0(size: int): pointer = result = unlockedAlloc(size) zeroMem(result, size) - proc unlockedDealloc(p: pointer) {.inline.} = + proc unlockedDealloc(p: pointer) = var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell)) - assert(cast[ptr TFreeCell](x).zeroField == 1) + sysAssert(cast[ptr TFreeCell](x).zeroField == 1) rawDealloc(allocator, x) - assert(not isAllocatedPtr(allocator, x)) + sysAssert(not isAllocatedPtr(allocator, x)) proc alloc(size: int): pointer = when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock) @@ -601,37 +609,18 @@ when not defined(useNimRtl): elif p != nil: dealloc(p) - proc countFreeMem(): int = - # only used for assertions - var it = allocator.freeChunksList - while it != nil: - inc(result, it.size) - it = it.next + when false: + proc countFreeMem(): int = + # only used for assertions + var it = allocator.freeChunksList + while it != nil: + inc(result, it.size) + it = it.next proc getFreeMem(): int = result = allocator.freeMem - #assert(result == countFreeMem()) + #sysAssert(result == countFreeMem()) proc getTotalMem(): int = return allocator.currMem proc getOccupiedMem(): int = return getTotalMem() - getFreeMem() -when isMainModule: - const iterations = 4000_000 - incl(allocator.chunkStarts, 11) - assert 11 in allocator.chunkStarts - excl(allocator.chunkStarts, 11) - assert 11 notin allocator.chunkStarts - var p: array [1..iterations, pointer] - for i in 7..7: - var x = i * 8 - for j in 1.. iterations: - p[j] = alloc(allocator, x) - for j in 1..iterations: - assert isAllocatedPtr(allocator, p[j]) - echo($i, " used memory: ", $(allocator.currMem)) - for j in countdown(iterations, 1): - #echo("j: ", $j) - dealloc(allocator, p[j]) - assert(not isAllocatedPtr(allocator, p[j])) - echo($i, " after freeing: ", $(allocator.currMem)) - diff --git a/lib/system/assign.nim b/lib/system/assign.nim index aa5cd3af3..33bfa15f3 100755 --- a/lib/system/assign.nim +++ b/lib/system/assign.nim @@ -24,7 +24,7 @@ proc genericAssignAux(dest, src: Pointer, n: ptr TNimNode, shallow: bool) = n.typ.size) var m = selectBranch(src, n) if m != nil: genericAssignAux(dest, src, m, shallow) - of nkNone: assert(false) + of nkNone: sysAssert(false) #else: # echo "ugh memory corruption! ", n.kind # quit 1 @@ -33,7 +33,7 @@ proc genericAssignAux(dest, src: Pointer, mt: PNimType, shallow: bool) = var d = cast[TAddress](dest) s = cast[TAddress](src) - assert(mt != nil) + sysAssert(mt != nil) case mt.Kind of tyString: var x = cast[ppointer](dest) @@ -50,7 +50,7 @@ proc genericAssignAux(dest, src: Pointer, mt: PNimType, shallow: bool) = # this can happen! nil sequences are allowed unsureAsgnRef(x, s2) return - assert(dest != nil) + sysAssert(dest != nil) unsureAsgnRef(x, newObj(mt, seq.len * mt.base.size + GenericSeqSize)) var dst = cast[taddress](cast[ppointer](dest)[]) for i in 0..seq.len-1: @@ -101,7 +101,7 @@ proc objectInit(dest: Pointer, typ: PNimType) {.compilerProc.} proc objectInitAux(dest: Pointer, n: ptr TNimNode) = var d = cast[TAddress](dest) case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSLot: objectInit(cast[pointer](d +% n.offset), n.typ) of nkList: for i in 0..n.len-1: @@ -134,7 +134,7 @@ proc genericReset(dest: Pointer, mt: PNimType) {.compilerProc.} proc genericResetAux(dest: Pointer, n: ptr TNimNode) = var d = cast[TAddress](dest) case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSlot: genericReset(cast[pointer](d +% n.offset), n.typ) of nkList: for i in 0..n.len-1: genericResetAux(dest, n.sons[i]) @@ -145,7 +145,7 @@ proc genericResetAux(dest: Pointer, n: ptr TNimNode) = proc genericReset(dest: Pointer, mt: PNimType) = var d = cast[TAddress](dest) - assert(mt != nil) + sysAssert(mt != nil) case mt.Kind of tyString, tyRef, tySequence: unsureAsgnRef(cast[ppointer](dest), nil) @@ -173,4 +173,4 @@ proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int, if newBranch != oldBranch and oldDiscVal != 0: raise newException(EInvalidField, "assignment to discriminant changes object branch") - + diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index 31c25c5af..64f8e03e0 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -22,14 +22,14 @@ else: inc(p, val) result = p -proc atomicInc(memLoc: var int, x: int): int = +proc atomicInc(memLoc: var int, x: int = 1): int = when hasThreadSupport: result = sync_add_and_fetch(memLoc, x) else: inc(memLoc, x) result = memLoc -proc atomicDec(memLoc: var int, x: int): int = +proc atomicDec(memLoc: var int, x: int = 1): int = when hasThreadSupport: when defined(sync_sub_and_fetch): result = sync_sub_and_fetch(memLoc, x) diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim index e262d4b77..7502636fa 100755 --- a/lib/system/cellsets.nim +++ b/lib/system/cellsets.nim @@ -102,9 +102,9 @@ proc CellSetGet(t: TCellSet, key: TAddress): PPageDesc = proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) = var h = cast[int](desc.key) and t.max while data[h] != nil: - assert(data[h] != desc) + sysAssert(data[h] != desc) h = nextTry(h, t.max) - assert(data[h] == nil) + sysAssert(data[h] == nil) data[h] = desc proc CellSetEnlarge(t: var TCellSet) = @@ -130,7 +130,7 @@ proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc = inc(t.counter) h = cast[int](key) and t.max while t.data[h] != nil: h = nextTry(h, t.max) - assert(t.data[h] == nil) + sysAssert(t.data[h] == nil) # the new page descriptor goes into result result = cast[PPageDesc](unlockedAlloc0(sizeof(TPageDesc))) result.next = t.head diff --git a/lib/system/ecmasys.nim b/lib/system/ecmasys.nim index e2ecb370a..7f91feb6b 100755 --- a/lib/system/ecmasys.nim +++ b/lib/system/ecmasys.nim @@ -408,7 +408,7 @@ proc NimCopy(x: pointer, ti: PNimType): pointer {.compilerproc.} proc NimCopyAux(dest, src: Pointer, n: ptr TNimNode) {.exportc.} = case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSlot: asm "`dest`[`n`.offset] = NimCopy(`src`[`n`.offset], `n`.typ);" of nkList: diff --git a/lib/system/gc.nim b/lib/system/gc.nim index 29fd2eae5..d1fa98514 100755 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -53,17 +53,20 @@ type TGcHeap {.final, pure.} = object # this contains the zero count and # non-zero count table + stackBottom: pointer + cycleThreshold: int zct: TCellSeq # the zero count table decStack: TCellSeq # cells in the stack that are to decref again cycleRoots: TCellSet tempStack: TCellSeq # temporary stack for recursion elimination recGcLock: int # prevent recursion via finalizers; no thread lock + region: TMemRegion # garbage collected region stat: TGcStat var - stackBottom {.rtlThreadVar.}: pointer gch {.rtlThreadVar.}: TGcHeap - cycleThreshold {.rtlThreadVar.}: int + +InstantiateForRegion(gch.region) proc acquire(gch: var TGcHeap) {.inline.} = when hasThreadSupport and hasSharedHeap: @@ -124,30 +127,30 @@ when traceGC: of csAllocated: if c in states[csAllocated]: writeCell("attempt to alloc an already allocated cell", c) - assert(false) + sysAssert(false) excl(states[csCycFreed], c) excl(states[csZctFreed], c) of csZctFreed: if c in states[csZctFreed]: writeCell("attempt to free zct cell twice", c) - assert(false) + sysAssert(false) if c in states[csCycFreed]: writeCell("attempt to free with zct, but already freed with cyc", c) - assert(false) + sysAssert(false) if c notin states[csAllocated]: writeCell("attempt to free not an allocated cell", c) - assert(false) + sysAssert(false) excl(states[csAllocated], c) of csCycFreed: if c notin states[csAllocated]: writeCell("attempt to free a not allocated cell", c) - assert(false) + sysAssert(false) if c in states[csCycFreed]: writeCell("attempt to free cyc cell twice", c) - assert(false) + sysAssert(false) if c in states[csZctFreed]: writeCell("attempt to free with cyc, but already freed with zct", c) - assert(false) + sysAssert(false) excl(states[csAllocated], c) incl(states[state], c) @@ -216,7 +219,7 @@ proc decRef(c: PCell) {.inline.} = when stressGC: if c.refcount <% rcIncrement: writeCell("broken cell", c) - assert(c.refcount >=% rcIncrement) + sysAssert(c.refcount >=% rcIncrement) #if c.refcount <% rcIncrement: quit("leck mich") if --c.refcount: rtlAddZCT(c) @@ -233,7 +236,7 @@ proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p)) proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} = # the code generator calls this proc! - assert(not isOnStack(dest)) + sysAssert(not isOnStack(dest)) # BUGFIX: first incRef then decRef! if src != nil: incRef(usrToCell(src)) if dest[] != nil: decRef(usrToCell(dest[])) @@ -267,7 +270,7 @@ proc initGC() = when not defined(useNimRtl): when traceGC: for i in low(TCellState)..high(TCellState): Init(states[i]) - cycleThreshold = InitialCycleThreshold + gch.cycleThreshold = InitialCycleThreshold gch.stat.stackScans = 0 gch.stat.cycleCollections = 0 gch.stat.maxThreshold = 0 @@ -289,7 +292,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = of nkCase: var m = selectBranch(dest, n) if m != nil: forAllSlotsAux(dest, m, op) - of nkNone: assert(false) + of nkNone: sysAssert(false) proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = var d = cast[TAddress](dest) @@ -306,9 +309,9 @@ proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = else: nil proc forAllChildren(cell: PCell, op: TWalkOp) = - assert(cell != nil) - assert(cell.typ != nil) - assert cell.typ.kind in {tyRef, tySequence, tyString} + sysAssert(cell != nil) + sysAssert(cell.typ != nil) + sysAssert cell.typ.kind in {tyRef, tySequence, tyString} case cell.typ.Kind of tyRef: # common case forAllChildrenAux(cellToUsr(cell), cell.typ.base, op) @@ -321,12 +324,7 @@ proc forAllChildren(cell: PCell, op: TWalkOp) = GenericSeqSize), cell.typ.base, op) else: nil -proc checkCollection {.inline.} = - # checks if a collection should be done - if gch.recGcLock == 0: - collectCT(gch) - -proc addNewObjToZCT(res: PCell) {.inline.} = +proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} = # we check the last 8 entries (cache line) for a slot that could be reused. # In 63% of all cases we succeed here! But we have to optimize the heck # out of this small linear search so that ``newObj`` is not slowed down. @@ -370,14 +368,14 @@ proc addNewObjToZCT(res: PCell) {.inline.} = return add(gch.zct, res) -proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = +proc newObj(typ: PNimType, size: int, gch: var TGcHeap): pointer = # generates a new object and sets its reference counter to 0 acquire(gch) - assert(typ.kind in {tyRef, tyString, tySequence}) - checkCollection() - var res = cast[PCell](rawAlloc(allocator, size + sizeof(TCell))) + sysAssert(typ.kind in {tyRef, tyString, tySequence}) + collectCT(gch) + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell))) zeroMem(res, size+sizeof(TCell)) - assert((cast[TAddress](res) and (MemAlign-1)) == 0) + sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0) # now it is buffered in the ZCT res.typ = typ when debugGC and not hasThreadSupport: @@ -385,13 +383,16 @@ proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = res.filename = framePtr.prev.filename res.line = framePtr.prev.line res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT - assert(isAllocatedPtr(allocator, res)) + sysAssert(isAllocatedPtr(gch.region, res)) # its refcount is zero, so add it to the ZCT: - addNewObjToZCT(res) + addNewObjToZCT(res, gch) when logGC: writeCell("new cell", res) gcTrace(res, csAllocated) release(gch) - result = cellToUsr(res) + result = cellToUsr(res) + +proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = + result = newObj(typ, size, gch) proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = # `newObj` already uses locks, so no need for them here. @@ -399,23 +400,22 @@ proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = cast[PGenericSeq](result).len = len cast[PGenericSeq](result).space = len -proc growObj(old: pointer, newsize: int): pointer {.rtl.} = +proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = acquire(gch) - checkCollection() + collectCT(gch) var ol = usrToCell(old) - assert(ol.typ != nil) - assert(ol.typ.kind in {tyString, tySequence}) - var res = cast[PCell](rawAlloc(allocator, newsize + sizeof(TCell))) + sysAssert(ol.typ != nil) + sysAssert(ol.typ.kind in {tyString, tySequence}) + var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell))) var elemSize = 1 - if ol.typ.kind != tyString: - elemSize = ol.typ.base.size + if ol.typ.kind != tyString: elemSize = ol.typ.base.size var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize copyMem(res, ol, oldsize + sizeof(TCell)) zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)), newsize-oldsize) - assert((cast[TAddress](res) and (MemAlign-1)) == 0) - assert(res.refcount shr rcShift <=% 1) + sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0) + sysAssert(res.refcount shr rcShift <=% 1) #if res.refcount <% rcIncrement: # add(gch.zct, res) #else: # XXX: what to do here? @@ -434,29 +434,32 @@ proc growObj(old: pointer, newsize: int): pointer {.rtl.} = writeCell("growObj new cell", res) gcTrace(ol, csZctFreed) gcTrace(res, csAllocated) - when reallyDealloc: rawDealloc(allocator, ol) + when reallyDealloc: rawDealloc(gch.region, ol) else: - assert(ol.typ != nil) + sysAssert(ol.typ != nil) zeroMem(ol, sizeof(TCell)) release(gch) result = cellToUsr(res) +proc growObj(old: pointer, newsize: int): pointer {.rtl.} = + result = growObj(old, newsize, gch) + # ---------------- cycle collector ------------------------------------------- proc doOperation(p: pointer, op: TWalkOp) = if p == nil: return var c: PCell = usrToCell(p) - assert(c != nil) + sysAssert(c != nil) case op # faster than function pointers because of easy prediction of waZctDecRef: - assert(c.refcount >=% rcIncrement) + sysAssert(c.refcount >=% rcIncrement) c.refcount = c.refcount -% rcIncrement when logGC: writeCell("decref (from doOperation)", c) if c.refcount <% rcIncrement: addZCT(gch.zct, c) of waPush: add(gch.tempStack, c) of waCycleDecRef: - assert(c.refcount >=% rcIncrement) + sysAssert(c.refcount >=% rcIncrement) c.refcount = c.refcount -% rcIncrement # we now use a much simpler and non-recursive algorithm for cycle removal @@ -496,20 +499,20 @@ proc collectCycles(gch: var TGcHeap) = prepareDealloc(c) gcTrace(c, csCycFreed) when logGC: writeCell("cycle collector dealloc cell", c) - when reallyDealloc: rawDealloc(allocator, c) + when reallyDealloc: rawDealloc(gch.region, c) else: - assert(c.typ != nil) + sysAssert(c.typ != nil) zeroMem(c, sizeof(TCell)) Deinit(gch.cycleRoots) Init(gch.cycleRoots) -proc gcMark(p: pointer) {.inline.} = +proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} = # the addresses are not as cells on the stack, so turn them to cells: var cell = usrToCell(p) var c = cast[TAddress](cell) if c >% PageSize and (c and (MemAlign-1)) == 0: # fast check: does it look like a cell? - if isAllocatedPtr(allocator, cell): + if isAllocatedPtr(gch.region, cell): # mark the cell: cell.refcount = cell.refcount +% rcIncrement add(gch.decStack, cell) @@ -520,13 +523,13 @@ proc markThreadStacks(gch: var TGcHeap) = var it = threadList while it != nil: # mark registers: - for i in 0 .. high(it.registers): gcMark(it.registers[i]) + for i in 0 .. high(it.registers): gcMark(gch, it.registers[i]) var sp = cast[TAddress](it.stackBottom) var max = cast[TAddress](it.stackTop) # XXX stack direction? # XXX unroll this loop: while sp <=% max: - gcMark(cast[ppointer](sp)[]) + gcMark(gch, cast[ppointer](sp)[]) sp = sp +% sizeof(pointer) it = it.next @@ -545,24 +548,24 @@ when not defined(useNimRtl): proc setStackBottom(theStackBottom: pointer) = #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom) # the first init must be the one that defines the stack bottom: - if stackBottom == nil: stackBottom = theStackBottom + if gch.stackBottom == nil: gch.stackBottom = theStackBottom else: var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2 - var b = cast[TAddress](stackBottom) + var b = cast[TAddress](gch.stackBottom) when stackIncreases: - stackBottom = cast[pointer](min(a, b)) + gch.stackBottom = cast[pointer](min(a, b)) else: - stackBottom = cast[pointer](max(a, b)) + gch.stackBottom = cast[pointer](max(a, b)) proc stackSize(): int {.noinline.} = var stackTop {.volatile.}: pointer - result = abs(cast[int](addr(stackTop)) - cast[int](stackBottom)) + result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom)) when defined(sparc): # For SPARC architecture. proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer stackTop = addr(stackTop) - var b = cast[TAddress](stackBottom) + var b = cast[TAddress](gch.stackBottom) var a = cast[TAddress](stackTop) var x = cast[TAddress](p) result = a <=% x and x <=% b @@ -574,13 +577,13 @@ when defined(sparc): # For SPARC architecture. asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ var - max = stackBottom + max = gch.stackBottom sp: PPointer stackTop: array[0..1, pointer] sp = addr(stackTop[0]) # Addresses decrease as the stack grows. while sp <= max: - gcMark(sp[]) + gcMark(gch, sp[]) sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) elif defined(ELATE): @@ -593,7 +596,7 @@ elif stackIncreases: proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer stackTop = addr(stackTop) - var a = cast[TAddress](stackBottom) + var a = cast[TAddress](gch.stackBottom) var b = cast[TAddress](stackTop) var x = cast[TAddress](p) result = a <=% x and x <=% b @@ -606,12 +609,12 @@ elif stackIncreases: proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = var registers: C_JmpBuf if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[TAddress](stackBottom) + var max = cast[TAddress](gch.stackBottom) var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer) # sp will traverse the JMP_BUF as well (jmp_buf size is added, # otherwise sp would be below the registers structure). while sp >=% max: - gcMark(cast[ppointer](sp)[]) + gcMark(gch, cast[ppointer](sp)[]) sp = sp -% sizeof(pointer) else: @@ -621,7 +624,7 @@ else: proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer stackTop = addr(stackTop) - var b = cast[TAddress](stackBottom) + var b = cast[TAddress](gch.stackBottom) var a = cast[TAddress](stackTop) var x = cast[TAddress](p) result = a <=% x and x <=% b @@ -633,22 +636,22 @@ else: type PStackSlice = ptr array [0..7, pointer] var registers: C_JmpBuf if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[TAddress](stackBottom) + var max = cast[TAddress](gch.stackBottom) var sp = cast[TAddress](addr(registers)) # loop unrolled: while sp <% max - 8*sizeof(pointer): - gcMark(cast[PStackSlice](sp)[0]) - gcMark(cast[PStackSlice](sp)[1]) - gcMark(cast[PStackSlice](sp)[2]) - gcMark(cast[PStackSlice](sp)[3]) - gcMark(cast[PStackSlice](sp)[4]) - gcMark(cast[PStackSlice](sp)[5]) - gcMark(cast[PStackSlice](sp)[6]) - gcMark(cast[PStackSlice](sp)[7]) + gcMark(gch, cast[PStackSlice](sp)[0]) + gcMark(gch, cast[PStackSlice](sp)[1]) + gcMark(gch, cast[PStackSlice](sp)[2]) + gcMark(gch, cast[PStackSlice](sp)[3]) + gcMark(gch, cast[PStackSlice](sp)[4]) + gcMark(gch, cast[PStackSlice](sp)[5]) + gcMark(gch, cast[PStackSlice](sp)[6]) + gcMark(gch, cast[PStackSlice](sp)[7]) sp = sp +% sizeof(pointer)*8 # last few entries: while sp <=% max: - gcMark(cast[ppointer](sp)[]) + gcMark(gch, cast[ppointer](sp)[]) sp = sp +% sizeof(pointer) # ---------------------------------------------------------------------------- @@ -664,7 +667,7 @@ proc CollectZCT(gch: var TGcHeap) = while L[] > 0: var c = gch.zct.d[0] # remove from ZCT: - assert((c.refcount and colorMask) == rcZct) + sysAssert((c.refcount and colorMask) == rcZct) c.refcount = c.refcount and not colorMask gch.zct.d[0] = gch.zct.d[L[] - 1] dec(L[]) @@ -683,41 +686,42 @@ proc CollectZCT(gch: var TGcHeap) = # access invalid memory. This is done by prepareDealloc(): prepareDealloc(c) forAllChildren(c, waZctDecRef) - when reallyDealloc: rawDealloc(allocator, c) + when reallyDealloc: rawDealloc(gch.region, c) else: - assert(c.typ != nil) + sysAssert(c.typ != nil) zeroMem(c, sizeof(TCell)) proc unmarkStackAndRegisters(gch: var TGcHeap) = var d = gch.decStack.d for i in 0..gch.decStack.len-1: - assert isAllocatedPtr(allocator, d[i]) + sysAssert isAllocatedPtr(allocator, d[i]) # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock var c = d[i] # XXX no need for an atomic dec here: if --c.refcount: addZCT(gch.zct, c) - assert c.typ != nil + sysAssert c.typ != nil gch.decStack.len = 0 proc collectCT(gch: var TGcHeap) = - if gch.zct.len >= ZctThreshold or (cycleGC and - getOccupiedMem() >= cycleThreshold) or stressGC: + if (gch.zct.len >= ZctThreshold or (cycleGC and + getOccupiedMem(gch.region) >= gch.cycleThreshold) or stressGC) and + gch.recGcLock == 0: gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) - assert(gch.decStack.len == 0) + sysAssert(gch.decStack.len == 0) markStackAndRegisters(gch) markThreadStacks(gch) gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len) inc(gch.stat.stackScans) collectZCT(gch) when cycleGC: - if getOccupiedMem() >= cycleThreshold or stressGC: + if getOccupiedMem() >= gch.cycleThreshold or stressGC: collectCycles(gch) collectZCT(gch) inc(gch.stat.cycleCollections) - cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * - cycleIncrease) - gch.stat.maxThreshold = max(gch.stat.maxThreshold, cycleThreshold) + gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * + cycleIncrease) + gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) unmarkStackAndRegisters(gch) when not defined(useNimRtl): @@ -741,18 +745,18 @@ when not defined(useNimRtl): of gcOptimizeTime: nil proc GC_enableMarkAndSweep() = - cycleThreshold = InitialCycleThreshold + gch.cycleThreshold = InitialCycleThreshold proc GC_disableMarkAndSweep() = - cycleThreshold = high(cycleThreshold)-1 + gch.cycleThreshold = high(gch.cycleThreshold)-1 # set to the max value to suppress the cycle detector proc GC_fullCollect() = acquire(gch) - var oldThreshold = cycleThreshold - cycleThreshold = 0 # forces cycle collection + var oldThreshold = gch.cycleThreshold + gch.cycleThreshold = 0 # forces cycle collection collectCT(gch) - cycleThreshold = oldThreshold + gch.cycleThreshold = oldThreshold release(gch) proc GC_getStatistics(): string = diff --git a/lib/system/inboxes.nim b/lib/system/inboxes.nim new file mode 100644 index 000000000..8f683f612 --- /dev/null +++ b/lib/system/inboxes.nim @@ -0,0 +1,203 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Message passing for threads. The current implementation is slow and does +## not work with cyclic data structures. But hey, it's better than nothing. + +type + pbytes = ptr array[0.. 0xffff, byte] + TInbox {.pure, final.} = object ## msg queue for a thread + rd, wr, count, mask: int + data: pbytes + lock: TSysLock + cond: TSysCond + elemType: PNimType + region: TMemRegion + PInbox = ptr TInbox + TLoadStoreMode = enum mStore, mLoad + +proc initInbox(p: pointer) = + var inbox = cast[PInbox](p) + initSysLock(inbox.lock) + initSysCond(inbox.cond) + inbox.mask = -1 + +proc freeInbox(p: pointer) = + var inbox = cast[PInbox](p) + deallocOsPages(inbox.region) + deinitSys(inbox.lock) + deinitSysCond(inbox.cond) + +proc storeAux(dest, src: Pointer, mt: PNimType, t: PInbox, mode: TLoadStoreMode) +proc storeAux(dest, src: Pointer, n: ptr TNimNode, t: PInbox, + mode: TLoadStoreMode) = + var + d = cast[TAddress](dest) + s = cast[TAddress](src) + case n.kind + of nkSlot: storeAux(cast[pointer](d +% n.offset), + cast[pointer](s +% n.offset), n.typ, t, mode) + of nkList: + for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode) + of nkCase: + copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset), + n.typ.size) + var m = selectBranch(src, n) + if m != nil: storeAux(dest, src, m, t, mode) + of nkNone: sysAssert(false) + +proc storeAux(dest, src: Pointer, mt: PNimType, t: PInbox, + mode: TLoadStoreMode) = + var + d = cast[TAddress](dest) + s = cast[TAddress](src) + sysAssert(mt != nil) + case mt.Kind + of tyString: + if mode == mStore: + var x = cast[ppointer](dest) + var s2 = cast[ppointer](s)[] + if s2 == nil: + x[] = nil + else: + var ss = cast[NimString](s2) + var ns = cast[NimString](rawAlloc(t.region, ss.len+1 + GenericSeqSize)) + copyMem(ns, ss, ss.len+1 + GenericSeqSize) + x[] = ns + else: + var x = cast[ppointer](dest) + var s2 = cast[ppointer](s)[] + if s2 == nil: + unsureAsgnRef(x, s2) + else: + unsureAsgnRef(x, copyString(cast[NimString](s2))) + rawDealloc(t.region, s2) + of tySequence: + var s2 = cast[ppointer](src)[] + var seq = cast[PGenericSeq](s2) + var x = cast[ppointer](dest) + if s2 == nil: + if mode == mStore: + x[] = nil + else: + unsureAsgnRef(x, nil) + else: + sysAssert(dest != nil) + if mode == mStore: + x[] = rawAlloc(t.region, seq.len *% mt.base.size +% GenericSeqSize) + else: + unsureAsgnRef(x, newObj(mt, seq.len * mt.base.size + GenericSeqSize)) + var dst = cast[taddress](cast[ppointer](dest)[]) + for i in 0..seq.len-1: + storeAux( + cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize), + cast[pointer](cast[TAddress](s2) +% i *% mt.base.size +% + GenericSeqSize), + mt.Base, t, mode) + var dstseq = cast[PGenericSeq](dst) + dstseq.len = seq.len + dstseq.space = seq.len + if mode != mStore: rawDealloc(t.region, s2) + of tyObject: + # copy type field: + var pint = cast[ptr PNimType](dest) + # XXX use dynamic type here! + pint[] = mt + storeAux(dest, src, mt.node, t, mode) + of tyTuple, tyPureObject: + storeAux(dest, src, mt.node, t, mode) + of tyArray, tyArrayConstr: + for i in 0..(mt.size div mt.base.size)-1: + storeAux(cast[pointer](d +% i*% mt.base.size), + cast[pointer](s +% i*% mt.base.size), mt.base, t, mode) + of tyRef: + var s = cast[ppointer](src)[] + var x = cast[ppointer](dest) + if s == nil: + if mode == mStore: + x[] = nil + else: + unsureAsgnRef(x, nil) + else: + if mode == mStore: + x[] = rawAlloc(t.region, mt.base.size) + else: + # XXX we should use the dynamic type here too, but that is not stored in + # the inbox at all --> use source[]'s object type? but how? we need a + # tyRef to the object! + var obj = newObj(mt.base, mt.base.size) + unsureAsgnRef(x, obj) + storeAux(x[], s, mt.base, t, mode) + if mode != mStore: rawDealloc(t.region, s) + else: + copyMem(dest, src, mt.size) # copy raw bits + +proc rawSend(q: PInbox, data: pointer, typ: PNimType) = + ## adds an `item` to the end of the queue `q`. + var cap = q.mask+1 + if q.count >= cap: + # start with capicity for 2 entries in the queue: + if cap == 0: cap = 1 + var n = cast[pbytes](rawAlloc0(q.region, cap*2*typ.size)) + var z = 0 + var i = q.rd + var c = q.count + while c > 0: + dec c + copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size) + i = (i + 1) and q.mask + inc z + if q.data != nil: rawDealloc(q.region, q.data) + q.data = n + q.mask = cap*2 - 1 + q.wr = q.count + q.rd = 0 + #echo "came here" + storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore) + inc q.count + q.wr = (q.wr + 1) and q.mask + +proc rawRecv(q: PInbox, data: pointer, typ: PNimType) = + assert q.count > 0 + dec q.count + storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad) + q.rd = (q.rd + 1) and q.mask + +template lockInbox(q: expr, action: stmt) = + acquireSys(q.lock) + action + releaseSys(q.lock) + +proc send*[TMsg](receiver: var TThread[TMsg], msg: TMsg) = + ## sends a message to a thread. `msg` is deeply copied. + var q = cast[PInbox](getInBoxMem(receiver)) + acquireSys(q.lock) + var m: TMsg + shallowCopy(m, msg) + rawSend(q, addr(m), cast[PNimType](getTypeInfo(msg))) + releaseSys(q.lock) + SignalSysCond(q.cond) + +proc recv*[TMsg](): TMsg = + ## receives a message from its internal message queue. This blocks until + ## a message has arrived! You may use ``peek`` to avoid the blocking. + var q = cast[PInbox](getInBoxMem()) + acquireSys(q.lock) + while q.count <= 0: + WaitSysCond(q.cond, q.lock) + rawRecv(q, addr(result), cast[PNimType](getTypeInfo(result))) + releaseSys(q.lock) + +proc peek*(): int = + ## returns the current number of messages in the inbox. + var q = cast[PInbox](getInBoxMem()) + lockInbox(q): + result = q.count + + diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim index d450c520e..e5efff615 100755 --- a/lib/system/mmdisp.nim +++ b/lib/system/mmdisp.nim @@ -62,11 +62,10 @@ when defined(boehmgc): const boehmLib = "boehmgc.dll" elif defined(macosx): const boehmLib = "libgc.dylib" - - proc boehmGCinit {.importc: "GC_init", dynlib: boehmLib.} else: const boehmLib = "/usr/lib/libgc.so.1" - + + proc boehmGCinit {.importc: "GC_init", dynlib: boehmLib.} proc boehmGC_disable {.importc: "GC_disable", dynlib: boehmLib.} proc boehmGC_enable {.importc: "GC_enable", dynlib: boehmLib.} proc boehmGCincremental {. @@ -177,12 +176,20 @@ elif defined(nogc): proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} = dest[] = src + var allocator {.rtlThreadVar.}: TMemRegion + InstantiateForRegion(allocator) + include "system/cellsets" else: include "system/alloc" + + proc unlockedAlloc(size: int): pointer {.inline.} + proc unlockedAlloc0(size: int): pointer {.inline.} + proc unlockedDealloc(p: pointer) {.inline.} + include "system/cellsets" - assert(sizeof(TCell) == sizeof(TFreeCell)) + sysAssert(sizeof(TCell) == sizeof(TFreeCell)) include "system/gc" {.pop.} diff --git a/lib/system/repr.nim b/lib/system/repr.nim index 256313ebd..6b940ccb4 100755 --- a/lib/system/repr.nim +++ b/lib/system/repr.nim @@ -158,7 +158,7 @@ when not defined(useNimRtl): proc reprRecordAux(result: var string, p: pointer, n: ptr TNimNode, cl: var TReprClosure) = case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSlot: add result, $n.name add result, " = " @@ -206,7 +206,7 @@ when not defined(useNimRtl): var t = cast[ptr PNimType](p)[] reprRecord(result, p, t, cl) of tyRef, tyPtr: - assert(p != nil) + sysAssert(p != nil) if cast[ppointer](p)[] == nil: add result, "nil" else: reprRef(result, cast[ppointer](p)[], typ, cl) of tySequence: diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim new file mode 100644 index 000000000..c91e83dcd --- /dev/null +++ b/lib/system/syslocks.nim @@ -0,0 +1,101 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Low level system locks and condition vars. + +when defined(Windows): + type + THandle = int + TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi + DebugInfo: pointer + LockCount: int32 + RecursionCount: int32 + OwningThread: int + LockSemaphore: int + Reserved: int32 + + TSysCond = THandle + + proc InitSysLock(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "InitializeCriticalSection".} + ## Initializes the lock `L`. + + proc TryAcquireSysAux(L: var TSysLock): int32 {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "TryEnterCriticalSection".} + ## Tries to acquire the lock `L`. + + proc TryAcquireSys(L: var TSysLock): bool {.inline.} = + result = TryAcquireSysAux(L) != 0'i32 + + proc AcquireSys(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "EnterCriticalSection".} + ## Acquires the lock `L`. + + proc ReleaseSys(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "LeaveCriticalSection".} + ## Releases the lock `L`. + + proc DeinitSys(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "DeleteCriticalSection".} + + proc CreateEvent(lpEventAttributes: pointer, + bManualReset, bInitialState: int32, + lpName: cstring): TSysCond {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "CreateEvent".} + + proc CloseHandle(hObject: THandle) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "CloseHandle".} + proc WaitForSingleObject(hHandle: THandle, dwMilliseconds: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} + + proc SignalSysCond(hEvent: TSysCond) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "SetEvent".} + + proc InitSysCond(cond: var TSysCond) {.inline.} = + cond = CreateEvent(nil, 0'i32, 0'i32, nil) + proc DeinitSysCond(cond: var TSysCond) {.inline.} = + CloseHandle(cond) + proc WaitSysCond(cond: var TSysCond, lock: var TSysLock) = + releaseSys(lock) + discard WaitForSingleObject(cond, -1'i32) + acquireSys(lock) + +else: + type + TSysLock {.importc: "pthread_mutex_t", pure, final, + header: "<sys/types.h>".} = object + TSysCond {.importc: "pthread_cond_t", pure, final, + header: "<sys/types.h>".} = object + + proc InitSysLock(L: var TSysLock, attr: pointer = nil) {. + importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.} + + proc AcquireSys(L: var TSysLock) {.noSideEffect, + importc: "pthread_mutex_lock", header: "<pthread.h>".} + proc TryAcquireSysAux(L: var TSysLock): cint {.noSideEffect, + importc: "pthread_mutex_trylock", header: "<pthread.h>".} + + proc TryAcquireSys(L: var TSysLock): bool {.inline.} = + result = TryAcquireSysAux(L) == 0'i32 + + proc ReleaseSys(L: var TSysLock) {.noSideEffect, + importc: "pthread_mutex_unlock", header: "<pthread.h>".} + proc DeinitSys(L: var TSysLock) {. + importc: "pthread_mutex_destroy", header: "<pthread.h>".} + + proc InitSysCond(cond: var TSysCond, cond_attr: pointer = nil) {. + importc: "pthread_cond_init", header: "<pthread.h>".} + proc WaitSysCond(cond: var TSysCond, lock: var TSysLock) {. + importc: "pthread_cond_wait", header: "<pthread.h>".} + proc SignalSysCond(cond: var TSysCond) {. + importc: "pthread_cond_signal", header: "<pthread.h>".} + + proc DeinitSysCond(cond: var TSysCond) {. + importc: "pthread_cond_destroy", header: "<pthread.h>".} + diff --git a/lib/system/threads.nim b/lib/system/threads.nim index 86a6a5691..9bb67863b 100755 --- a/lib/system/threads.nim +++ b/lib/system/threads.nim @@ -25,8 +25,8 @@ ## thr: array [0..4, TThread[tuple[a,b: int]]] ## L: TLock ## -## proc threadFunc(interval: tuple[a,b: int]) {.procvar.} = -## for i in interval.a..interval.b: +## proc threadFunc(interval: tuple[a,b: int]) {.thread.} = +## for i in interval.a..interval.b: ## Acquire(L) # lock stdout ## echo i ## Release(L) @@ -41,38 +41,13 @@ const maxRegisters = 256 # don't think there is an arch with more registers maxLocksPerThread* = 10 ## max number of locks a thread can hold ## at the same time + useStackMaskHack = false ## use the stack mask hack for better performance + StackGuardSize = 4096 + ThreadStackMask = 1024*256*sizeof(int)-1 + ThreadStackSize = ThreadStackMask+1 - StackGuardSize -when defined(Windows): - type - TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi - DebugInfo: pointer - LockCount: int32 - RecursionCount: int32 - OwningThread: int - LockSemaphore: int - Reserved: int32 - - proc InitSysLock(L: var TSysLock) {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "InitializeCriticalSection".} - ## Initializes the lock `L`. - - proc TryAcquireSysAux(L: var TSysLock): int32 {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "TryEnterCriticalSection".} - ## Tries to acquire the lock `L`. - - proc TryAcquireSys(L: var TSysLock): bool {.inline.} = - result = TryAcquireSysAux(L) != 0'i32 - - proc AcquireSys(L: var TSysLock) {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "EnterCriticalSection".} - ## Acquires the lock `L`. - - proc ReleaseSys(L: var TSysLock) {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "LeaveCriticalSection".} - ## Releases the lock `L`. - +when defined(windows): type - THandle = int TSysThread = THandle TWinThreadProc = proc (x: pointer): int32 {.stdcall.} @@ -95,9 +70,6 @@ when defined(Windows): dwMilliseconds: int32): int32 {. stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".} - proc WaitForSingleObject(hHandle: TSysThread, dwMilliseconds: int32): int32 {. - stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} - proc TerminateThread(hThread: TSysThread, dwExitCode: int32): int32 {. stdcall, dynlib: "kernel32", importc: "TerminateThread".} @@ -116,24 +88,6 @@ else: {.passC: "-pthread".} type - TSysLock {.importc: "pthread_mutex_t", pure, final, - header: "<sys/types.h>".} = object - - proc InitSysLock(L: var TSysLock, attr: pointer = nil) {. - importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.} - - proc AcquireSys(L: var TSysLock) {.noSideEffect, - importc: "pthread_mutex_lock", header: "<pthread.h>".} - proc TryAcquireSysAux(L: var TSysLock): cint {.noSideEffect, - importc: "pthread_mutex_trylock", header: "<pthread.h>".} - - proc TryAcquireSys(L: var TSysLock): bool {.inline.} = - result = TryAcquireSysAux(L) == 0'i32 - - proc ReleaseSys(L: var TSysLock) {.noSideEffect, - importc: "pthread_mutex_unlock", header: "<pthread.h>".} - - type TSysThread {.importc: "pthread_t", header: "<sys/types.h>", final, pure.} = object Tpthread_attr {.importc: "pthread_attr_t", @@ -191,57 +145,71 @@ else: proc ThreadVarGetValue(s: TThreadVarSlot): pointer {.inline.} = result = pthread_getspecific(s) -const emulatedThreadVars = defined(macosx) + when useStackMaskHack: + proc pthread_attr_setstack(attr: var TPthread_attr, stackaddr: pointer, + size: int): cint {. + importc: "pthread_attr_setstack", header: "<pthread.h>".} + +const + emulatedThreadVars = true when emulatedThreadVars: # the compiler generates this proc for us, so that we can get the size of - # the thread local var block: + # the thread local var block; we use this only for sanity checking though proc NimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".} -proc ThreadVarsAlloc(size: int): pointer = - result = c_malloc(size) - zeroMem(result, size) -proc ThreadVarsDealloc(p: pointer) {.importc: "free", nodecl.} - +# we preallocate a fixed size for thread local storage, so that no heap +# allocations are needed. Currently less than 7K are used on a 64bit machine. +# We use ``float`` for proper alignment: type + TThreadLocalStorage = array [0..1_000, float] + PGcThread = ptr TGcThread TGcThread {.pure.} = object sys: TSysThread next, prev: PGcThread - stackBottom, stackTop, threadLocalStorage: pointer + stackBottom, stackTop: pointer stackSize: int - locksLen: int - locks: array [0..MaxLocksPerThread-1, pointer] - registers: array[0..maxRegisters-1, pointer] # register contents for GC + inbox: TThreadLocalStorage + when emulatedThreadVars and not useStackMaskHack: + tls: TThreadLocalStorage + else: + nil # XXX it'd be more efficient to not use a global variable for the # thread storage slot, but to rely on the implementation to assign slot 0 # for us... ;-) var globalsSlot = ThreadVarAlloc() #const globalsSlot = TThreadVarSlot(0) -#assert checkSlot.int == globalsSlot.int - -proc ThisThread(): PGcThread {.compilerRtl, inl.} = - result = cast[PGcThread](ThreadVarGetValue(globalsSlot)) +#sysAssert checkSlot.int == globalsSlot.int proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} = - result = cast[PGcThread](ThreadVarGetValue(globalsSlot)).threadLocalStorage + result = addr(cast[PGcThread](ThreadVarGetValue(globalsSlot)).tls) + +when useStackMaskHack: + proc MaskStackPointer(offset: int): pointer {.compilerRtl, inl.} = + var x {.volatile.}: pointer + x = addr(x) + result = cast[pointer]((cast[int](x) and not ThreadStackMask) +% + (0) +% offset) # create for the main thread. Note: do not insert this data into the list # of all threads; it's not to be stopped etc. when not defined(useNimRtl): - var mainThread: TGcThread - - ThreadVarSetValue(globalsSlot, addr(mainThread)) - when emulatedThreadVars: - mainThread.threadLocalStorage = ThreadVarsAlloc(NimThreadVarsSize()) - - initStackBottom() - initGC() + when not useStackMaskHack: + var mainThread: TGcThread + ThreadVarSetValue(globalsSlot, addr(mainThread)) + initStackBottom() + initGC() var heapLock: TSysLock InitSysLock(HeapLock) + when emulatedThreadVars: + if NimThreadVarsSize() > sizeof(TThreadLocalStorage): + echo "too large thread local storage size requested" + quit 1 + var threadList: PGcThread @@ -251,11 +219,11 @@ when not defined(useNimRtl): t.prev = nil t.next = threadList if threadList != nil: - assert(threadList.prev == nil) + sysAssert(threadList.prev == nil) threadList.prev = t threadList = t ReleaseSys(HeapLock) - + proc unregisterThread(t: PGcThread) = # we need to use the GC global lock here! AcquireSys(HeapLock) @@ -270,9 +238,7 @@ when not defined(useNimRtl): # on UNIX, the GC uses ``SIGFREEZE`` to tell every thread to stop so that # the GC can examine the stacks? - - proc stopTheWord() = - nil + proc stopTheWord() = nil # We jump through some hops here to ensure that Nimrod thread procs can have # the Nimrod calling convention. This is needed because thread procs are @@ -286,26 +252,33 @@ type fn: proc (p: TParam) data: TParam +proc initInbox(p: pointer) +proc freeInbox(p: pointer) when not defined(boehmgc) and not hasSharedHeap: proc deallocOsPages() template ThreadProcWrapperBody(closure: expr) = ThreadVarSetValue(globalsSlot, closure) var t = cast[ptr TThread[TParam]](closure) - when emulatedThreadVars: - t.threadLocalStorage = ThreadVarsAlloc(NimThreadVarsSize()) + when useStackMaskHack: + var tls: TThreadLocalStorage when not defined(boehmgc) and not hasSharedHeap: # init the GC for this thread: setStackBottom(addr(t)) initGC() t.stackBottom = addr(t) registerThread(t) + initInbox(addr(t.inbox)) try: + when false: + var a = addr(tls) + var b = MaskStackPointer(1293920-372736-303104-36864) + c_fprintf(c_stdout, "TLS: %p\nmasked: %p\ndiff: %ld\n", + a, b, cast[int](a) - cast[int](b)) t.fn(t.data) finally: # XXX shut-down is not executed when the thread is forced down! - when emulatedThreadVars: - ThreadVarsDealloc(t.threadLocalStorage) + freeInbox(addr(t.inbox)) unregisterThread(t) when defined(deallocOsPages): deallocOsPages() @@ -330,7 +303,7 @@ proc joinThreads*[TParam](t: openArray[TThread[TParam]]) = ## waits for every thread in `t` to finish. when hostOS == "windows": var a: array[0..255, TSysThread] - assert a.len >= t.len + sysAssert a.len >= t.len for i in 0..t.high: a[i] = t[i].sys discard WaitForMultipleObjects(t.len, cast[ptr TSysThread](addr(a)), 1, -1) else: @@ -338,7 +311,7 @@ proc joinThreads*[TParam](t: openArray[TThread[TParam]]) = when false: # XXX a thread should really release its heap here somehow: - proc destroyThread*[TParam](t: var TThread[TParam]) {.inline.} = + proc destroyThread*[TParam](t: var TThread[TParam]) = ## forces the thread `t` to terminate. This is potentially dangerous if ## you don't have full control over `t` and its acquired resources. when hostOS == "windows": @@ -348,28 +321,32 @@ when false: unregisterThread(addr(t)) proc createThread*[TParam](t: var TThread[TParam], - tp: proc (param: TParam), - param: TParam, - stackSize = 1024*256*sizeof(int)) {. - magic: "CreateThread".} = + tp: proc (param: TParam) {.thread.}, + param: TParam) = ## creates a new thread `t` and starts its execution. Entry point is the ## proc `tp`. `param` is passed to `tp`. t.data = param t.fn = tp - t.stackSize = stackSize + t.stackSize = ThreadStackSize when hostOS == "windows": var dummyThreadId: int32 - t.sys = CreateThread(nil, stackSize, threadProcWrapper[TParam], + t.sys = CreateThread(nil, ThreadStackSize, threadProcWrapper[TParam], addr(t), 0'i32, dummyThreadId) if t.sys <= 0: raise newException(EResourceExhausted, "cannot create thread") else: var a: Tpthread_attr pthread_attr_init(a) - pthread_attr_setstacksize(a, stackSize) + pthread_attr_setstacksize(a, ThreadStackSize) if pthread_create(t.sys, a, threadProcWrapper[TParam], addr(t)) != 0: raise newException(EResourceExhausted, "cannot create thread") +when useStackMaskHack: + proc runMain(tp: proc (dummy: pointer) {.thread.}) {.compilerproc.} = + var mainThread: TThread[pointer] + createThread(mainThread, tp, nil) + joinThread(mainThread) + # --------------------------- lock handling ---------------------------------- type @@ -380,18 +357,20 @@ const when nodeadlocks: var - deadlocksPrevented* = 0 ## counts the number of times a + deadlocksPrevented*: int ## counts the number of times a ## deadlock has been prevented + locksLen {.threadvar.}: int + locks {.threadvar.}: array [0..MaxLocksPerThread-1, pointer] + + proc OrderedLocks(): bool = + for i in 0 .. locksLen-2: + if locks[i] >= locks[i+1]: return false + result = true proc InitLock*(lock: var TLock) {.inline.} = ## Initializes the lock `lock`. InitSysLock(lock) -proc OrderedLocks(g: PGcThread): bool = - for i in 0 .. g.locksLen-2: - if g.locks[i] >= g.locks[i+1]: return false - result = true - proc TryAcquire*(lock: var TLock): bool {.inline.} = ## Try to acquires the lock `lock`. Returns `true` on success. result = TryAcquireSys(lock) @@ -399,88 +378,93 @@ proc TryAcquire*(lock: var TLock): bool {.inline.} = if not result: return # we have to add it to the ordered list. Oh, and we might fail if # there is no space in the array left ... - var g = ThisThread() - if g.locksLen >= len(g.locks): + if locksLen >= len(locks): ReleaseSys(lock) raise newException(EResourceExhausted, "cannot acquire additional lock") # find the position to add: var p = addr(lock) - var L = g.locksLen-1 + var L = locksLen-1 var i = 0 while i <= L: - assert g.locks[i] != nil - if g.locks[i] < p: inc(i) # in correct order - elif g.locks[i] == p: return # thread already holds lock + sysAssert locks[i] != nil + if locks[i] < p: inc(i) # in correct order + elif locks[i] == p: return # thread already holds lock else: # do the crazy stuff here: while L >= i: - g.locks[L+1] = g.locks[L] + locks[L+1] = locks[L] dec L - g.locks[i] = p - inc(g.locksLen) - assert OrderedLocks(g) + locks[i] = p + inc(locksLen) + sysAssert OrderedLocks() return # simply add to the end: - g.locks[g.locksLen] = p - inc(g.locksLen) - assert OrderedLocks(g) + locks[locksLen] = p + inc(locksLen) + sysAssert OrderedLocks(g) proc Acquire*(lock: var TLock) = ## Acquires the lock `lock`. when nodeadlocks: - var g = ThisThread() var p = addr(lock) - var L = g.locksLen-1 + var L = locksLen-1 var i = 0 while i <= L: - assert g.locks[i] != nil - if g.locks[i] < p: inc(i) # in correct order - elif g.locks[i] == p: return # thread already holds lock + sysAssert locks[i] != nil + if locks[i] < p: inc(i) # in correct order + elif locks[i] == p: return # thread already holds lock else: # do the crazy stuff here: - if g.locksLen >= len(g.locks): + if locksLen >= len(locks): raise newException(EResourceExhausted, "cannot acquire additional lock") while L >= i: - ReleaseSys(cast[ptr TSysLock](g.locks[L])[]) - g.locks[L+1] = g.locks[L] + ReleaseSys(cast[ptr TSysLock](locks[L])[]) + locks[L+1] = locks[L] dec L # acquire the current lock: AcquireSys(lock) - g.locks[i] = p - inc(g.locksLen) + locks[i] = p + inc(locksLen) # acquire old locks in proper order again: - L = g.locksLen-1 + L = locksLen-1 inc i while i <= L: - AcquireSys(cast[ptr TSysLock](g.locks[i])[]) + AcquireSys(cast[ptr TSysLock](locks[i])[]) inc(i) # DANGER: We can only modify this global var if we gained every lock! # NO! We need an atomic increment. Crap. discard system.atomicInc(deadlocksPrevented, 1) - assert OrderedLocks(g) + sysAssert OrderedLocks(g) return # simply add to the end: - if g.locksLen >= len(g.locks): + if locksLen >= len(locks): raise newException(EResourceExhausted, "cannot acquire additional lock") AcquireSys(lock) - g.locks[g.locksLen] = p - inc(g.locksLen) - assert OrderedLocks(g) + locks[locksLen] = p + inc(locksLen) + sysAssert OrderedLocks(g) else: AcquireSys(lock) proc Release*(lock: var TLock) = ## Releases the lock `lock`. when nodeadlocks: - var g = ThisThread() var p = addr(lock) - var L = g.locksLen + var L = locksLen for i in countdown(L-1, 0): - if g.locks[i] == p: - for j in i..L-2: g.locks[j] = g.locks[j+1] - dec g.locksLen + if locks[i] == p: + for j in i..L-2: locks[j] = locks[j+1] + dec locksLen break ReleaseSys(lock) +# ------------------------ message passing support --------------------------- + +proc getInBoxMem*[TMsg](t: var TThread[TMsg]): pointer {.inline.} = + result = addr(t.inbox) + +proc getInBoxMem*(): pointer {.inline.} = + result = addr(cast[PGcThread](ThreadVarGetValue(globalsSlot)).inbox) + diff --git a/tests/accept/run/tnodeadlocks.nim b/tests/accept/run/tnodeadlocks.nim index eef60c594..3235e84ee 100755 --- a/tests/accept/run/tnodeadlocks.nim +++ b/tests/accept/run/tnodeadlocks.nim @@ -14,7 +14,7 @@ var proc doNothing() = nil -proc threadFunc(interval: tuple[a, b: int]) {.procvar.} = +proc threadFunc(interval: tuple[a, b: int]) {.thread.} = doNothing() for i in interval.a..interval.b: when nodeadlocks: diff --git a/todo.txt b/todo.txt index 3308309c7..710f9b8aa 100755 --- a/todo.txt +++ b/todo.txt @@ -1,7 +1,15 @@ High priority (version 0.8.12) ============================== -* test threads on windows; thread analysis needs to be even more restrictive! -* implement message passing built-ins: channels/queues +* test threads on windows +* test thread analysis: + var x = globalString # ok, copied; `x` is mine! + vs + var x = globalRef # read access, `x` is theirs! + +* test message passing built-ins +* make threadvar efficient again on linux after testing +* document Nimrod's threads +* document Nimrod's two phase symbol lookup for generics * bug: {:}.toTable[int, string]() @@ -11,6 +19,7 @@ version 0.9.0 - add --deadlock_prevention:on|off switch? timeout for locks? - bug: tfFinal not passed to generic - bug: forward proc for generic seems broken +- ``var T`` as a return type; easy to prove that location is not on the stack - test the sort implementation again - warning for implicit openArray -> varargs convention - implement explicit varargs @@ -74,7 +83,6 @@ Low priority - ``when T is int`` for generic code - ``when validCode( proc () )`` for generic code -- macros: ``typecheck`` pragma; this allows transformations based on types! - find a way for easy constructors and destructors; (destructors are much more important than constructors) - code generated for type information is wasteful diff --git a/web/news.txt b/web/news.txt index cb41dba7c..95f850f72 100755 --- a/web/news.txt +++ b/web/news.txt @@ -56,6 +56,7 @@ Additions - Added ``lists`` module which contains generic linked lists. - Added ``sets`` module which contains generic hash sets. - Added ``tables`` module which contains generic hash tables. +- Added ``queues`` module which contains generic sequence based queues. - Added ``intsets`` module which contains a specialized int set data type. - Added ``scgi`` module. - Added ``smtp`` module. diff --git a/web/nimrod.ini b/web/nimrod.ini index d50bfe453..b4d86df3b 100755 --- a/web/nimrod.ini +++ b/web/nimrod.ini @@ -39,7 +39,7 @@ srcdoc: "pure/xmlparser;pure/htmlparser;pure/xmltree;pure/colors" srcdoc: "pure/json;pure/base64;pure/scgi;pure/redis;impure/graphics" srcdoc: "impure/rdstdin;wrappers/zmq;wrappers/sphinx" srcdoc: "pure/collections/tables;pure/collections/sets;pure/collections/lists" -srcdoc: "pure/collections/intsets;pure/encodings" +srcdoc: "pure/collections/intsets;pure/collections/queues;pure/encodings" webdoc: "wrappers/libcurl;pure/md5;wrappers/mysql;wrappers/iup" webdoc: "wrappers/sqlite3;wrappers/postgres;wrappers/tinyc" |