diff options
-rw-r--r-- | .github/workflows/ci_docs.yml | 1 | ||||
-rw-r--r-- | compiler/msgs.nim | 18 | ||||
-rw-r--r-- | compiler/strutils2.nim | 57 | ||||
-rw-r--r-- | tests/stdlib/tstrutils2.nim | 32 |
4 files changed, 105 insertions, 3 deletions
diff --git a/.github/workflows/ci_docs.yml b/.github/workflows/ci_docs.yml index 2fbf7e7dc..816f560c9 100644 --- a/.github/workflows/ci_docs.yml +++ b/.github/workflows/ci_docs.yml @@ -24,6 +24,7 @@ on: - 'tools/kochdocs.nim' - '.github/workflows/ci_docs.yml' + jobs: build: strategy: diff --git a/compiler/msgs.nim b/compiler/msgs.nim index a0fc9b08d..e27eedd1c 100644 --- a/compiler/msgs.nim +++ b/compiler/msgs.nim @@ -11,6 +11,7 @@ import options, strutils, os, tables, ropes, terminal, macros, lineinfos, pathutils import std/private/miscdollars +import strutils2 type InstantiationInfo = typeof(instantiationInfo()) template instLoc(): InstantiationInfo = instantiationInfo(-2, fullPaths = true) @@ -66,6 +67,12 @@ when defined(nimpretty): proc fileSection*(conf: ConfigRef; fid: FileIndex; a, b: int): string = substr(conf.m.fileInfos[fid.int].fullContent, a, b) +proc canonicalCase(path: var string) = + ## the idea is to only use this for checking whether a path is already in + ## the table but otherwise keep the original case + when FileSystemCaseSensitive: discard + else: toLowerAscii(path) + proc fileInfoKnown*(conf: ConfigRef; filename: AbsoluteFile): bool = var canon: AbsoluteFile @@ -73,6 +80,7 @@ proc fileInfoKnown*(conf: ConfigRef; filename: AbsoluteFile): bool = canon = canonicalizePath(conf, filename) except OSError: canon = filename + canon.string.canonicalCase result = conf.m.filenameToIndexTbl.hasKey(canon.string) proc fileInfoIdx*(conf: ConfigRef; filename: AbsoluteFile; isKnownFile: var bool): FileIndex = @@ -89,15 +97,19 @@ proc fileInfoIdx*(conf: ConfigRef; filename: AbsoluteFile; isKnownFile: var bool # This flag indicates that we are working with such a path here pseudoPath = true - if conf.m.filenameToIndexTbl.hasKey(canon.string): + var canon2: string + forceCopy(canon2, canon.string) # because `canon` may be shallow + canon2.canonicalCase + + if conf.m.filenameToIndexTbl.hasKey(canon2): isKnownFile = true - result = conf.m.filenameToIndexTbl[canon.string] + result = conf.m.filenameToIndexTbl[canon2] else: isKnownFile = false result = conf.m.fileInfos.len.FileIndex conf.m.fileInfos.add(newFileInfo(canon, if pseudoPath: RelativeFile filename else: relativeTo(canon, conf.projectPath))) - conf.m.filenameToIndexTbl[canon.string] = result + conf.m.filenameToIndexTbl[canon2] = result proc fileInfoIdx*(conf: ConfigRef; filename: AbsoluteFile): FileIndex = var dummy: bool diff --git a/compiler/strutils2.nim b/compiler/strutils2.nim new file mode 100644 index 000000000..6cb50347f --- /dev/null +++ b/compiler/strutils2.nim @@ -0,0 +1,57 @@ +##[ +internal API for now, subject to modifications and moving around + +string API's focusing on performance, that can be used as building blocks +for other routines. + +Un-necessary allocations are avoided and appropriate algorithms are used at the +expense of code clarity when justified. +]## + +proc dataPointer*[T](a: T): pointer = + ## same as C++ `data` that works with std::string, std::vector etc. + ## Note: safe to use when a.len == 0 but whether the result is nil or not + ## is implementation defined for performance reasons. + # this could be improved with ocmpiler support to avoid the `if`, eg in C++ + # `&a[0]` is well defined even if a.size() == 0 + when T is string | seq: + if a.len == 0: nil else: cast[pointer](a[0].unsafeAddr) + elif T is array: + when a.len > 0: a.unsafeAddr + else: nil + elif T is cstring: + cast[pointer](a) + else: static: doAssert false, $T + +proc setLen*(result: var string, n: int, isInit: bool) = + ## when isInit = false, elements are left uninitialized, analog to `{.noinit.}` + ## else, there are 0-initialized. + # xxx placeholder until system.setLen supports this + # to distinguish between algorithms that need 0-initialization vs not; note + # that `setLen` for string is inconsistent with `setLen` for seq. + # likwise with `newString` vs `newSeq`. This should be fixed in `system`. + let n0 = result.len + result.setLen(n) + if isInit and n > n0: + zeroMem(result[n0].addr, n - n0) + +proc forceCopy*(result: var string, a: string) = + ## also forces a copy if `a` is shallow + # the naitve `result = a` would not work if `a` is shallow + let n = a.len + result.setLen n, isInit = false + copyMem(result.dataPointer, a.dataPointer, n) + +proc isUpperAscii(c: char): bool {.inline.} = + # avoids import strutils.isUpperAscii + c in {'A'..'Z'} + +proc toLowerAscii*(a: var string) = + ## optimized and inplace overload of strutils.toLowerAscii + # refs https://github.com/timotheecour/Nim/pull/54 + # this is 10X faster than a naive implementation using a an optimization trick + # that can be adapted in similar contexts. Predictable writes avoid write + # hazards and lead to better machine code, compared to random writes arising + # from: `if c.isUpperAscii: c = ...` + for c in mitems(a): + c = chr(c.ord + (if c.isUpperAscii: (ord('a') - ord('A')) else: 0)) diff --git a/tests/stdlib/tstrutils2.nim b/tests/stdlib/tstrutils2.nim new file mode 100644 index 000000000..881817f90 --- /dev/null +++ b/tests/stdlib/tstrutils2.nim @@ -0,0 +1,32 @@ +import "$lib/.." / compiler/strutils2 + +block: # setLen + var a = "abc" + a.setLen 0 + a.setLen 3, isInit = false + doAssert a[1] == 'b' + a.setLen 0 + a.setLen 3, isInit = true + doAssert a[1] == '\0' + +block: # forceCopy + var a: string + a = "foo" + shallow(a) + var b: string + b = a + doAssert b[0].addr == a[0].addr + var c: string + c.forceCopy a + doAssert c == a + doAssert c[0].addr != a[0].addr + +block: # toLowerAscii + var a = "fooBAr" + a.toLowerAscii + doAssert a == "foobar" + +block: # dataPointer + var a: string + discard a.dataPointer + # doAssert a.dataPointer == nil # not guaranteed |