preparations for proper memmap'ed files

author: Araq <rumpf_a@web.de> 2011-10-22 11:08:52 +0200
committer: Araq <rumpf_a@web.de> 2011-10-22 11:08:52 +0200
commit: 235bd1c47402a62db097a0bae7d63224b46d02d8 (patch)
tree: 4cf5b3808c785cb7487de8b80cb503fd2b5cbd54
parent: ded7f6a6d56eaeeee0e9788a0637012e53981475 (diff)
download: Nim-235bd1c47402a62db097a0bae7d63224b46d02d8.tar.gz
4 files changed, 105 insertions, 32 deletions
diff --git a/compiler/rodread.nim b/compiler/rodread.nim
index 6e5e1fb03..c884db6b1 100755
--- a/compiler/rodread.nim
+++ b/compiler/rodread.nim
@@ -68,12 +68,16 @@
 #    sym
 #    )
 #
+#    The data section MUST be the last section of the file, because processing
+#    stops immediately after ``DATA(`` and the rest is only loaded on demand
+#    by using mem'mapped a file.
+#
 #  We now also do index compression, because an index always needs to be read.
 #
 
 import 
   os, options, strutils, nversion, ast, astalgo, msgs, platform, condsyms, 
-  ropes, idents, crc, idgen, rodutils
+  ropes, idents, crc, idgen, rodutils, memfiles
 
 type 
   TReasonForRecompile* = enum 
@@ -104,26 +108,28 @@ type
     offset*: int              # readers use this
   
   TRodReader* = object of TObject
-    pos*: int                 # position; used for parsing
-    s*: string                # the whole file in memory; XXX mmap this!
-    options*: TOptions
-    reason*: TReasonForRecompile
-    modDeps*: TStringSeq
-    files*: TStringSeq
-    dataIdx*: int             # offset of start of data section
-    convertersIdx*: int       # offset of start of converters section
-    initIdx*, interfIdx*, compilerProcsIdx*, cgenIdx*: int
-    filename*: string
-    index*, imports*: TIndex
-    readerIndex*: int
-    line*: int            # only used for debugging, but is always in the code
-    moduleID*: int
-    syms*: TIdTable           # already processed symbols
+    pos: int                 # position; used for parsing
+    s: cstring               # mmap'ed file contents
+    options: TOptions
+    reason: TReasonForRecompile
+    modDeps: TStringSeq
+    files: TStringSeq
+    dataIdx: int             # offset of start of data section
+    convertersIdx: int       # offset of start of converters section
+    initIdx, interfIdx, compilerProcsIdx: int
+    filename: string
+    index, imports: TIndex
+    readerIndex: int
+    line: int            # only used for debugging, but is always in the code
+    moduleID: int
+    syms: TIdTable       # already processed symbols
+    memfile: TMemFile    # unfortunately there is no point in time where we
+                         # can close this! XXX
   
   PRodReader* = ref TRodReader
 
 const 
-  FileVersion* = "1019"       # modify this if the rod-format changes!
+  FileVersion* = "1022"       # modify this if the rod-format changes!
 
 var rodCompilerprocs*: TStrTable
 
@@ -541,14 +547,11 @@ proc processRodFile(r: PRodReader, crc: TCrc32) =
     of "DATA": 
       r.dataIdx = r.pos + 2 # "(\10"
       # We do not read the DATA section here! We read the needed objects on
-      # demand.
-      skipSection(r)
+      # demand. And the DATA section comes last in the file, so we stop here:
+      break
     of "INIT": 
       r.initIdx = r.pos + 2   # "(\10"
       skipSection(r)
-    of "CGEN": 
-      r.cgenIdx = r.pos + 2
-      skipSection(r)
     else: 
       MsgWriteln("skipping section: " & $r.pos)
       skipSection(r)
@@ -556,6 +559,12 @@ proc processRodFile(r: PRodReader, crc: TCrc32) =
       inc(r.pos)
       inc(r.line)
 
+
+proc startsWith(buf: cstring, token: string, pos = 0): bool =
+  var s = 0
+  while s < token.len and buf[pos+s] == token[s]: inc s
+  result = s == token.len
+
 proc newRodReader(modfilename: string, crc: TCrc32, 
                   readerIndex: int): PRodReader = 
   new(result)
@@ -568,7 +577,10 @@ proc newRodReader(modfilename: string, crc: TCrc32,
   r.readerIndex = readerIndex
   r.filename = modfilename
   InitIdTable(r.syms)
-  r.s = readFile(modfilename)
+  if not open(r.memFile, modfilename): return nil
+  # we terminate the file explicitely with ``\0``, so the cast to `cstring`
+  # is save:
+  r.s = cast[cstring](r.memFile.mem)
   if startsWith(r.s, "NIM:"): 
     initIITable(r.index.tab)
     initIITable(r.imports.tab) # looks like a ROD file
diff --git a/compiler/rodwrite.nim b/compiler/rodwrite.nim
index 30cbe2942..a27673237 100755
--- a/compiler/rodwrite.nim
+++ b/compiler/rodwrite.nim
@@ -282,15 +282,20 @@ proc encodeSym(w: PRodWriter, s: PSym, result: var string) =
     result.add('@')
     encodeVInt(ord(s.magic), result)
   if s.ast != nil: 
-    var codeAst: PNode = nil
-    if not astNeeded(s):
-      codeAst = s.ast.sons[codePos]
-      # ugly hack to not store the AST:
-      s.ast.sons[codePos] = ast.emptyNode
+    # we used to attempt to save space here by only storing a dummy AST if
+    # it is not necessary, but Nimrod's heavy compile-time evaluation features
+    # make that unfeasible nowadays:
     encodeNode(w, s.info, s.ast, result)
-    if codeAst != nil:
-      # resore the AST:
-      s.ast.sons[codePos] = codeAst
+    when false:
+      var codeAst: PNode = nil
+      if not astNeeded(s):
+        codeAst = s.ast.sons[codePos]
+        # ugly hack to not store the AST:
+        s.ast.sons[codePos] = ast.emptyNode
+      encodeNode(w, s.info, s.ast, result)
+      if codeAst != nil:
+        # resore the AST:
+        s.ast.sons[codePos] = codeAst
   if s.options != w.options: 
     result.add('!')
     encodeVInt(cast[int32](s.options), result)
@@ -458,6 +463,9 @@ proc writeRod(w: PRodWriter) =
   f.write("DATA(" & rodNL)
   f.write(w.data)
   f.write(')' & rodNL)
+  # write trailing zero which is necessary because we use memory mapped files
+  # for reading:
+  f.write("\0")
   f.close()
   
   #echo "interf: ", w.interf.len
diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim
new file mode 100644
index 000000000..dd95d8d24
--- /dev/null
+++ b/lib/pure/memfiles.nim
@@ -0,0 +1,51 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2011 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module provides support for `memory mapped files`:idx:
+## (Posix's `mmap`:idx:) on the different operating systems.
+## XXX Currently it is implemented with Nimrod's
+## basic IO facilities and does not use any platform specific code!
+## Oh and currently only ``fmRead`` is supported...
+
+type
+  TMemFile* = object {.pure.}
+    file: TFile
+    buffer: pointer
+    fileLen: int
+  
+proc open*(f: var TMemFile, filename: string, mode: TFileMode = fmRead): bool =
+  ## open a memory mapped file `f`. Returns true for success.
+  assert mode == fmRead
+  result = open(f.file, filename, mode)
+
+  var len = getFileSize(f.file)
+  if len < high(int):
+    f.fileLen = int(len)
+    f.buffer = alloc(f.fileLen)
+    if readBuffer(f.file, f.buffer, f.fileLen) != f.fileLen:
+      raise newException(EIO, "error while reading from file")
+  else:
+    raise newException(EIO, "file too big to fit in memory")
+
+proc close*(f: var TMemFile) =
+  ## closes the memory mapped file `f`. All changes are written back to the
+  ## file system, if `f` was opened with write access.
+  dealloc(f.buffer)
+  close(f.file)
+
+proc mem*(f: var TMemFile): pointer {.inline.} =
+  ## retrives a pointer to the memory mapped file `f`. The pointer can be
+  ## used directly to change the contents of the file, if `f` was opened
+  ## with write access.
+  result = f.buffer
+
+proc size*(f: var TMemFile): int {.inline.} =
+  ## retrives the size of the memory mapped file `f`.
+  result = f.fileLen
+
diff --git a/todo.txt b/todo.txt
index ff27644ec..8b2655b16 100755
--- a/todo.txt
+++ b/todo.txt
@@ -11,7 +11,7 @@ Version 0.8.14
 - thread local vs. global raiseHook()
 - make pegs support a compile-time option and make c2nim use regexes instead
   per default
-
+- implement lib/pure/memfiles properly
 
 incremental compilation
 -----------------------
@@ -20,6 +20,8 @@ incremental compilation
 - write test cases: needs test script support
   - test thread var
   - test method generation
+  - test type converters
+  - test init sections
   - test DLL interfacing!
   - hallo.rod is missing initial statements: feature or bug?
 - fix remaining bugs
author	Araq <rumpf_a@web.de>	2011-10-22 11:08:52 +0200
committer	Araq <rumpf_a@web.de>	2011-10-22 11:08:52 +0200
commit	235bd1c47402a62db097a0bae7d63224b46d02d8 (patch)
tree	4cf5b3808c785cb7487de8b80cb503fd2b5cbd54
parent	ded7f6a6d56eaeeee0e9788a0637012e53981475 (diff)
download	Nim-235bd1c47402a62db097a0bae7d63224b46d02d8.tar.gz