# # # Nim's Runtime Library # (c) Copyright 2015 Nim Contributors # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## :Authors: Zahary Karadjov, Andreas Rumpf ## ## This module provides support for `memory mapped files`:idx: ## (Posix's `mmap`:idx:) on the different operating systems. ## ## It also provides some fast iterators over lines in text files (or ## other "line-like", variable length, delimited records). when defined(windows): import winlean elif defined(posix): import posix else: {.error: "the memfiles module is not supported on your operating system!".} import os type MemFile* = object ## represents a memory mapped file mem*: pointer ## a pointer to the memory mapped file. The pointer ## can be used directly to change the contents of the ## file, if it was opened with write access. size*: int ## size of the memory mapped file when defined(windows): fHandle: Handle mapHandle: Handle wasOpened: bool ## only close if wasOpened else: handle: cint {.deprecated: [TMemFile: MemFile].} proc mapMem*(m: var MemFile, mode: FileMode = fmRead, mappedSize = -1, offset = 0): pointer = var readonly = mode == fmRead when defined(windows): result = mapViewOfFileEx( m.mapHandle, if readonly: FILE_MAP_READ else: FILE_MAP_WRITE, int32(offset shr 32), int32(offset and 0xffffffff), if mappedSize == -1: 0 else: mappedSize, nil) if result == nil: raiseOSError(osLastError()) else: assert mappedSize > 0 result = mmap( nil, mappedSize, if readonly: PROT_READ else: PROT_READ or PROT_WRITE, if readonly: (MAP_PRIVATE or MAP_POPULATE) else: (MAP_SHARED or MAP_POPULATE), m.handle, offset) if result == cast[pointer](MAP_FAILED): raiseOSError(osLastError()) proc unmapMem*(f: var MemFile, p: pointer, size: int) = ## unmaps the memory region ``(p, ".} proc `$`*(ms: MemSlice): string {.inline.} = ## Return a Nim string built from a MemSlice. var buf = newString(ms.size) c_memcpy(addr(buf[0]), ms.data, ms.size) buf[ms.size] = '\0' result = buf iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = ## Iterates over [optional `eat`] `delim`-delimited slices in MemFile `mfile`. ## ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l) ## style on on a line-by-line basis. I.e., not every line needs the same ending. ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines ## are not supported as a third option for each line. Such archaic MacOS9 ## files can be handled by passing delim='\\r', eat='\\0', though. ## ## Delimiters are not part of the returned slice. A final, unterminated line ## or record is returned just like any other. ## ## Non-default delimiters can be passed to allow iteration over other sorts ## of "line-like" variable length records. Pass eat='\\0' to be strictly ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not ## supported.) ## ## This zero copy, memchr-limited interface is probably the fastest way to ## iterate over line-like records in a file. However, returned (data,size) ## objects are not Nim strings, bounds checked Nim arrays, or even terminated ## C strings. So, care is required to access the data (e.g., think C mem* ## functions, not str* functions). Example: ## ## .. code-block:: nim ## var count = 0 ## for slice in memSlices(memfiles.open("foo")): ## if slice.size > 0 and cast[cstring](slice.data)[0] != '#': ## inc(count) ## echo count proc c_memchr(cstr: pointer, c: char, n: csize): pointer {. importc: "memchr", header: "" .} proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q) var ms: MemSlice var ending: pointer ms.data = mfile.mem var remaining = mfile.size while remaining > 0: ending = c_memchr(ms.data, delim, remaining) if ending == nil: # unterminated final slice ms.size = remaining # Weird case..check eat? yield ms break ms.size = ending -! ms.data # delim is NOT included if eat != '\0' and ms.size > 0 and cast[cstring](ms.data)[ms.size - 1] == eat: dec(ms.size) # trim pre-delim char yield ms ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim remaining = mfile.size - (ms.data -! mfile.mem) iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} = ## Replace contents of passed buffer with each new line, like ## `readLine(File) `_. ## `delim`, `eat`, and delimiting logic is exactly as for ## `memSlices <#memSlices>`_, but Nim strings are returned. Example: ## ## .. code-block:: nim ## var buffer: TaintedString = "" ## for line in lines(memfiles.open("foo"), buffer): ## echo line for ms in memSlices(mfile, delim, eat): buf.setLen(ms.size) c_memcpy(addr(buf[0]), ms.data, ms.size) buf[ms.size] = '\0' yield buf iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} = ## Return each line in a file as a Nim string, like ## `lines(File) `_. ## `delim`, `eat`, and delimiting logic is exactly as for ## `memSlices <#memSlices>`_, but Nim strings are returned. Example: ## ## .. code-block:: nim ## for line in lines(memfiles.open("foo")): ## echo line var buf = TaintedString(newStringOfCap(80)) for line in lines(mfile, buf, delim, eat): yield buf