# # # Nim's Runtime Library # (c) Copyright 2015 Nim Contributors # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## :Authors: Zahary Karadjov, Andreas Rumpf ## ## This module provides support for `memory mapped files`:idx: ## (Posix's `mmap`:idx:) on the different operating systems. ## ## It also provides some fast iterators over lines in text files (or ## other "line-like", variable length, delimited records). when defined(windows): import winlean elif defined(posix): import posix else: {.error: "the memfiles module is not supported on your operating system!".} import os, streams proc newEIO(msg: string): ref IOError = new(result) result.msg = msg type MemFile* = object ## represents a memory mapped file mem*: pointer ## a pointer to the memory mapped file. The pointer ## can be used directly to change the contents of the ## file, if it was opened with write access. size*: int ## size of the memory mapped file when defined(windows): fHandle*: Handle ## **Caution**: Windows specific public field to allow ## even more low level trickery. mapHandle*: Handle ## **Caution**: Windows specific public field. wasOpened*: bool ## **Caution**: Windows specific public field. else: handle*: cint ## **Caution**: Posix specific public field. flags: cint ## **Caution**: Platform specific private field. proc mapMem*(m: var MemFile, mode: FileMode = fmRead, mappedSize = -1, offset = 0, mapFlags = cint(-1)): pointer = ## returns a pointer to a mapped portion of MemFile `m` ## ## ``mappedSize`` of ``-1`` maps to the whole file, and ## ``offset`` must be multiples of the PAGE SIZE of your OS if mode == fmAppend: raise newEIO("The append mode is not supported.") var readonly = mode == fmRead when defined(windows): result = mapViewOfFileEx( m.mapHandle, if readonly: FILE_MAP_READ else: FILE_MAP_READ or FILE_MAP_WRITE, int32(offset shr 32), int32(offset and 0xffffffff), if mappedSize == -1: 0 else: mappedSize, nil) if result == nil: raiseOSError(osLastError()) else: assert mappedSize > 0 m.flags = if mapFlags == cint(-1): MAP_SHARED else: mapFlags #Ensure exactly one of MAP_PRIVATE cr MAP_SHARED is set if int(m.flags and MAP_PRIVATE) == 0: m.flags = m.flags or MAP_SHARED result = mmap( nil, mappedSize, if readonly: PROT_READ else: PROT_READ or PROT_WRITE, m.flags, m.handle, offset) if result == cast[pointer](MAP_FAILED): raiseOSError(osLastError()) proc unmapMem*(f: var MemFile, p: pointer, size: int) = ## unmaps the memory region ``(p, " .} let newAddr = mremap(f.mem, csize(f.size), csize(newFileSize), cint(1)) if newAddr == cast[pointer](MAP_FAILED): raiseOSError(osLastError()) else: if munmap(f.mem, f.size) != 0: raiseOSError(osLastError()) let newAddr = mmap(nil, newFileSize, PROT_READ or PROT_WRITE, f.flags, f.handle, 0) if newAddr == cast[pointer](MAP_FAILED): raiseOSError(osLastError()) f.mem = newAddr f.size = newFileSize proc close*(f: var MemFile) = ## closes the memory mapped file `f`. All changes are written back to the ## file system, if `f` was opened with write access. var error = false var lastErr: OSErrorCode when defined(windows): if f.wasOpened: error = unmapViewOfFile(f.mem) == 0 if not error: error = closeHandle(f.mapHandle) == 0 if not error and f.fHandle != INVALID_HANDLE_VALUE: discard closeHandle(f.fHandle) f.fHandle = INVALID_HANDLE_VALUE if error: lastErr = osLastError() else: error = munmap(f.mem, f.size) != 0 lastErr = osLastError() if f.handle != -1: error = (close(f.handle) != 0) or error f.size = 0 f.mem = nil when defined(windows): f.fHandle = 0 f.mapHandle = 0 f.wasOpened = false else: f.handle = -1 if error: raiseOSError(lastErr) type MemSlice* = object ## represent slice of a MemFile for iteration over delimited lines/records data*: pointer size*: int proc `==`*(x, y: MemSlice): bool = ## Compare a pair of MemSlice for strict equality. result = (x.size == y.size and equalMem(x.data, y.data, x.size)) proc `$`*(ms: MemSlice): string {.inline.} = ## Return a Nim string built from a MemSlice. result.setLen(ms.size) copyMem(addr(result[0]), ms.data, ms.size) iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = ## Iterates over [optional `eat`] `delim`-delimited slices in MemFile `mfile`. ## ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l) ## style on on a line-by-line basis. I.e., not every line needs the same ending. ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines ## are not supported as a third option for each line. Such archaic MacOS9 ## files can be handled by passing delim='\\r', eat='\\0', though. ## ## Delimiters are not part of the returned slice. A final, unterminated line ## or record is returned just like any other. ## ## Non-default delimiters can be passed to allow iteration over other sorts ## of "line-like" variable length records. Pass eat='\\0' to be strictly ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not ## supported.) ## ## This zero copy, memchr-limited interface is probably the fastest way to ## iterate over line-like records in a file. However, returned (data,size) ## objects are not Nim strings, bounds checked Nim arrays, or even terminated ## C strings. So, care is required to access the data (e.g., think C mem* ## functions, not str* functions). ## ## Example: ## ## .. code-block:: nim ## var count = 0 ## for slice in memSlices(memfiles.open("foo")): ## if slice.size > 0 and cast[cstring](slice.data)[0] != '#': ## inc(count) ## echo count proc c_memchr(cstr: pointer, c: char, n: csize): pointer {. importc: "memchr", header: "" .} proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q) var ms: MemSlice var ending: pointer ms.data = mfile.mem var remaining = mfile.size while remaining > 0: ending = c_memchr(ms.data, delim, remaining) if ending == nil: # unterminated final slice ms.size = remaining # Weird case..check eat? yield ms break ms.size = ending -! ms.data # delim is NOT included if eat != '\0' and ms.size > 0 and cast[cstring](ms.data)[ms.size - 1] == eat: dec(ms.size) # trim pre-delim char yield ms ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim remaining = mfile.size - (ms.data -! mfile.mem) iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} = ## Replace contents of passed buffer with each new line, like ## `readLine(File) `_. ## `delim`, `eat`, and delimiting logic is exactly as for ## `memSlices <#memSlices>`_, but Nim strings are returned. ## ## Example: ## ## .. code-block:: nim ## var buffer: TaintedString = "" ## for line in lines(memfiles.open("foo"), buffer): ## echo line for ms in memSlices(mfile, delim, eat): setLen(buf.string, ms.size) if ms.size > 0: copyMem(addr buf[0], ms.data, ms.size) yield buf iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} = ## Return each line in a file as a Nim string, like ## `lines(File) `_. ## `delim`, `eat`, and delimiting logic is exactly as for ## `memSlices <#memSlices>`_, but Nim strings are returned. ## ## Example: ## ## .. code-block:: nim ## for line in lines(memfiles.open("foo")): ## echo line var buf = TaintedString(newStringOfCap(80)) for line in lines(mfile, buf, delim, eat): yield buf type MemMapFileStream* = ref MemMapFileStreamObj ## a stream that encapsulates a `MemFile` MemMapFileStreamObj* = object of Stream mf: MemFile mode: FileMode pos: ByteAddress proc mmsClose(s: Stream) = MemMapFileStream(s).pos = -1 close(MemMapFileStream(s).mf) proc mmsFlush(s: Stream) = flush(MemMapFileStream(s).mf) proc mmsAtEnd(s: Stream): bool = (MemMapFileStream(s).pos >= MemMapFileStream(s).mf.size) or (MemMapFileStream(s).pos < 0) proc mmsSetPosition(s: Stream, pos: int) = if pos > MemMapFileStream(s).mf.size or pos < 0: raise newEIO("cannot set pos in stream") MemMapFileStream(s).pos = pos proc mmsGetPosition(s: Stream): int = MemMapFileStream(s).pos proc mmsPeekData(s: Stream, buffer: pointer, bufLen: int): int = let startAddress = cast[ByteAddress](MemMapFileStream(s).mf.mem) let p = cast[ByteAddress](MemMapFileStream(s).pos) let l = min(bufLen, MemMapFileStream(s).mf.size - p) moveMem(buffer, cast[pointer](startAddress + p), l) result = l proc mmsReadData(s: Stream, buffer: pointer, bufLen: int): int = result = mmsPeekData(s, buffer, bufLen) inc(MemMapFileStream(s).pos, result) proc mmsWriteData(s: Stream, buffer: pointer, bufLen: int) = if MemMapFileStream(s).mode == fmRead: raise newEIO("cannot write to read-only stream") let size = MemMapFileStream(s).mf.size if MemMapFileStream(s).pos + bufLen > size: raise newEIO("cannot write to stream") let p = cast[ByteAddress](MemMapFileStream(s).mf.mem) + cast[ByteAddress](MemMapFileStream(s).pos) moveMem(cast[pointer](p), buffer, bufLen) inc(MemMapFileStream(s).pos, bufLen) proc newMemMapFileStream*(filename: string, mode: FileMode = fmRead, fileSize: int = -1): MemMapFileStream = ## creates a new stream from the file named `filename` with the mode `mode`. ## Raises ## `OSError` if the file cannot be opened. See the `system ## `_ module for a list of available FileMode enums. ## ``fileSize`` can only be set if the file does not exist and is opened ## with write access (e.g., with fmReadWrite). var mf: MemFile = open(filename, mode, newFileSize = fileSize) new(result) result.mode = mode result.mf = mf result.closeImpl = mmsClose result.atEndImpl = mmsAtEnd result.setPositionImpl = mmsSetPosition result.getPositionImpl = mmsGetPosition result.readDataImpl = mmsReadData result.peekDataImpl = mmsPeekData result.writeDataImpl = mmsWriteData result.flushImpl = mmsFlush