diff options
author | Araq <rumpf_a@web.de> | 2011-10-22 11:08:52 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2011-10-22 11:08:52 +0200 |
commit | 235bd1c47402a62db097a0bae7d63224b46d02d8 (patch) | |
tree | 4cf5b3808c785cb7487de8b80cb503fd2b5cbd54 | |
parent | ded7f6a6d56eaeeee0e9788a0637012e53981475 (diff) | |
download | Nim-235bd1c47402a62db097a0bae7d63224b46d02d8.tar.gz |
preparations for proper memmap'ed files
-rwxr-xr-x | compiler/rodread.nim | 58 | ||||
-rwxr-xr-x | compiler/rodwrite.nim | 24 | ||||
-rw-r--r-- | lib/pure/memfiles.nim | 51 | ||||
-rwxr-xr-x | todo.txt | 4 |
4 files changed, 105 insertions, 32 deletions
diff --git a/compiler/rodread.nim b/compiler/rodread.nim index 6e5e1fb03..c884db6b1 100755 --- a/compiler/rodread.nim +++ b/compiler/rodread.nim @@ -68,12 +68,16 @@ # sym # ) # +# The data section MUST be the last section of the file, because processing +# stops immediately after ``DATA(`` and the rest is only loaded on demand +# by using mem'mapped a file. +# # We now also do index compression, because an index always needs to be read. # import os, options, strutils, nversion, ast, astalgo, msgs, platform, condsyms, - ropes, idents, crc, idgen, rodutils + ropes, idents, crc, idgen, rodutils, memfiles type TReasonForRecompile* = enum @@ -104,26 +108,28 @@ type offset*: int # readers use this TRodReader* = object of TObject - pos*: int # position; used for parsing - s*: string # the whole file in memory; XXX mmap this! - options*: TOptions - reason*: TReasonForRecompile - modDeps*: TStringSeq - files*: TStringSeq - dataIdx*: int # offset of start of data section - convertersIdx*: int # offset of start of converters section - initIdx*, interfIdx*, compilerProcsIdx*, cgenIdx*: int - filename*: string - index*, imports*: TIndex - readerIndex*: int - line*: int # only used for debugging, but is always in the code - moduleID*: int - syms*: TIdTable # already processed symbols + pos: int # position; used for parsing + s: cstring # mmap'ed file contents + options: TOptions + reason: TReasonForRecompile + modDeps: TStringSeq + files: TStringSeq + dataIdx: int # offset of start of data section + convertersIdx: int # offset of start of converters section + initIdx, interfIdx, compilerProcsIdx: int + filename: string + index, imports: TIndex + readerIndex: int + line: int # only used for debugging, but is always in the code + moduleID: int + syms: TIdTable # already processed symbols + memfile: TMemFile # unfortunately there is no point in time where we + # can close this! XXX PRodReader* = ref TRodReader const - FileVersion* = "1019" # modify this if the rod-format changes! + FileVersion* = "1022" # modify this if the rod-format changes! var rodCompilerprocs*: TStrTable @@ -541,14 +547,11 @@ proc processRodFile(r: PRodReader, crc: TCrc32) = of "DATA": r.dataIdx = r.pos + 2 # "(\10" # We do not read the DATA section here! We read the needed objects on - # demand. - skipSection(r) + # demand. And the DATA section comes last in the file, so we stop here: + break of "INIT": r.initIdx = r.pos + 2 # "(\10" skipSection(r) - of "CGEN": - r.cgenIdx = r.pos + 2 - skipSection(r) else: MsgWriteln("skipping section: " & $r.pos) skipSection(r) @@ -556,6 +559,12 @@ proc processRodFile(r: PRodReader, crc: TCrc32) = inc(r.pos) inc(r.line) + +proc startsWith(buf: cstring, token: string, pos = 0): bool = + var s = 0 + while s < token.len and buf[pos+s] == token[s]: inc s + result = s == token.len + proc newRodReader(modfilename: string, crc: TCrc32, readerIndex: int): PRodReader = new(result) @@ -568,7 +577,10 @@ proc newRodReader(modfilename: string, crc: TCrc32, r.readerIndex = readerIndex r.filename = modfilename InitIdTable(r.syms) - r.s = readFile(modfilename) + if not open(r.memFile, modfilename): return nil + # we terminate the file explicitely with ``\0``, so the cast to `cstring` + # is save: + r.s = cast[cstring](r.memFile.mem) if startsWith(r.s, "NIM:"): initIITable(r.index.tab) initIITable(r.imports.tab) # looks like a ROD file diff --git a/compiler/rodwrite.nim b/compiler/rodwrite.nim index 30cbe2942..a27673237 100755 --- a/compiler/rodwrite.nim +++ b/compiler/rodwrite.nim @@ -282,15 +282,20 @@ proc encodeSym(w: PRodWriter, s: PSym, result: var string) = result.add('@') encodeVInt(ord(s.magic), result) if s.ast != nil: - var codeAst: PNode = nil - if not astNeeded(s): - codeAst = s.ast.sons[codePos] - # ugly hack to not store the AST: - s.ast.sons[codePos] = ast.emptyNode + # we used to attempt to save space here by only storing a dummy AST if + # it is not necessary, but Nimrod's heavy compile-time evaluation features + # make that unfeasible nowadays: encodeNode(w, s.info, s.ast, result) - if codeAst != nil: - # resore the AST: - s.ast.sons[codePos] = codeAst + when false: + var codeAst: PNode = nil + if not astNeeded(s): + codeAst = s.ast.sons[codePos] + # ugly hack to not store the AST: + s.ast.sons[codePos] = ast.emptyNode + encodeNode(w, s.info, s.ast, result) + if codeAst != nil: + # resore the AST: + s.ast.sons[codePos] = codeAst if s.options != w.options: result.add('!') encodeVInt(cast[int32](s.options), result) @@ -458,6 +463,9 @@ proc writeRod(w: PRodWriter) = f.write("DATA(" & rodNL) f.write(w.data) f.write(')' & rodNL) + # write trailing zero which is necessary because we use memory mapped files + # for reading: + f.write("\0") f.close() #echo "interf: ", w.interf.len diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim new file mode 100644 index 000000000..dd95d8d24 --- /dev/null +++ b/lib/pure/memfiles.nim @@ -0,0 +1,51 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module provides support for `memory mapped files`:idx: +## (Posix's `mmap`:idx:) on the different operating systems. +## XXX Currently it is implemented with Nimrod's +## basic IO facilities and does not use any platform specific code! +## Oh and currently only ``fmRead`` is supported... + +type + TMemFile* = object {.pure.} + file: TFile + buffer: pointer + fileLen: int + +proc open*(f: var TMemFile, filename: string, mode: TFileMode = fmRead): bool = + ## open a memory mapped file `f`. Returns true for success. + assert mode == fmRead + result = open(f.file, filename, mode) + + var len = getFileSize(f.file) + if len < high(int): + f.fileLen = int(len) + f.buffer = alloc(f.fileLen) + if readBuffer(f.file, f.buffer, f.fileLen) != f.fileLen: + raise newException(EIO, "error while reading from file") + else: + raise newException(EIO, "file too big to fit in memory") + +proc close*(f: var TMemFile) = + ## closes the memory mapped file `f`. All changes are written back to the + ## file system, if `f` was opened with write access. + dealloc(f.buffer) + close(f.file) + +proc mem*(f: var TMemFile): pointer {.inline.} = + ## retrives a pointer to the memory mapped file `f`. The pointer can be + ## used directly to change the contents of the file, if `f` was opened + ## with write access. + result = f.buffer + +proc size*(f: var TMemFile): int {.inline.} = + ## retrives the size of the memory mapped file `f`. + result = f.fileLen + diff --git a/todo.txt b/todo.txt index ff27644ec..8b2655b16 100755 --- a/todo.txt +++ b/todo.txt @@ -11,7 +11,7 @@ Version 0.8.14 - thread local vs. global raiseHook() - make pegs support a compile-time option and make c2nim use regexes instead per default - +- implement lib/pure/memfiles properly incremental compilation ----------------------- @@ -20,6 +20,8 @@ incremental compilation - write test cases: needs test script support - test thread var - test method generation + - test type converters + - test init sections - test DLL interfacing! - hallo.rod is missing initial statements: feature or bug? - fix remaining bugs |