4 files changed, 105 insertions, 3 deletions
diff --git a/.github/workflows/ci_docs.yml b/.github/workflows/ci_docs.yml
index 2fbf7e7dc..816f560c9 100644
--- a/.github/workflows/ci_docs.yml
+++ b/.github/workflows/ci_docs.yml
@@ -24,6 +24,7 @@ on:
       - 'tools/kochdocs.nim'
       - '.github/workflows/ci_docs.yml'
 
+
 jobs:
   build:
     strategy:
diff --git a/compiler/msgs.nim b/compiler/msgs.nim
index a0fc9b08d..e27eedd1c 100644
--- a/compiler/msgs.nim
+++ b/compiler/msgs.nim
@@ -11,6 +11,7 @@ import
   options, strutils, os, tables, ropes, terminal, macros,
   lineinfos, pathutils
 import std/private/miscdollars
+import strutils2
 
 type InstantiationInfo = typeof(instantiationInfo())
 template instLoc(): InstantiationInfo = instantiationInfo(-2, fullPaths = true)
@@ -66,6 +67,12 @@ when defined(nimpretty):
   proc fileSection*(conf: ConfigRef; fid: FileIndex; a, b: int): string =
     substr(conf.m.fileInfos[fid.int].fullContent, a, b)
 
+proc canonicalCase(path: var string) =
+  ## the idea is to only use this for checking whether a path is already in
+  ## the table but otherwise keep the original case
+  when FileSystemCaseSensitive: discard
+  else: toLowerAscii(path)
+
 proc fileInfoKnown*(conf: ConfigRef; filename: AbsoluteFile): bool =
   var
     canon: AbsoluteFile
@@ -73,6 +80,7 @@ proc fileInfoKnown*(conf: ConfigRef; filename: AbsoluteFile): bool =
     canon = canonicalizePath(conf, filename)
   except OSError:
     canon = filename
+  canon.string.canonicalCase
   result = conf.m.filenameToIndexTbl.hasKey(canon.string)
 
 proc fileInfoIdx*(conf: ConfigRef; filename: AbsoluteFile; isKnownFile: var bool): FileIndex =
@@ -89,15 +97,19 @@ proc fileInfoIdx*(conf: ConfigRef; filename: AbsoluteFile; isKnownFile: var bool
     # This flag indicates that we are working with such a path here
     pseudoPath = true
 
-  if conf.m.filenameToIndexTbl.hasKey(canon.string):
+  var canon2: string
+  forceCopy(canon2, canon.string) # because `canon` may be shallow
+  canon2.canonicalCase
+
+  if conf.m.filenameToIndexTbl.hasKey(canon2):
     isKnownFile = true
-    result = conf.m.filenameToIndexTbl[canon.string]
+    result = conf.m.filenameToIndexTbl[canon2]
   else:
     isKnownFile = false
     result = conf.m.fileInfos.len.FileIndex
     conf.m.fileInfos.add(newFileInfo(canon, if pseudoPath: RelativeFile filename
                                             else: relativeTo(canon, conf.projectPath)))
-    conf.m.filenameToIndexTbl[canon.string] = result
+    conf.m.filenameToIndexTbl[canon2] = result
 
 proc fileInfoIdx*(conf: ConfigRef; filename: AbsoluteFile): FileIndex =
   var dummy: bool
diff --git a/compiler/strutils2.nim b/compiler/strutils2.nim
new file mode 100644
index 000000000..6cb50347f
--- /dev/null
+++ b/compiler/strutils2.nim
@@ -0,0 +1,57 @@
+##[
+internal API for now, subject to modifications and moving around
+
+string API's focusing on performance, that can be used as building blocks
+for other routines.
+
+Un-necessary allocations are avoided and appropriate algorithms are used at the
+expense of code clarity when justified.
+]##
+
+proc dataPointer*[T](a: T): pointer =
+  ## same as C++ `data` that works with std::string, std::vector etc.
+  ## Note: safe to use when a.len == 0 but whether the result is nil or not
+  ## is implementation defined for performance reasons.
+  # this could be improved with ocmpiler support to avoid the `if`, eg in C++
+  # `&a[0]` is well defined even if a.size() == 0
+  when T is string | seq:
+    if a.len == 0: nil else: cast[pointer](a[0].unsafeAddr)
+  elif T is array:
+    when a.len > 0: a.unsafeAddr
+    else: nil
+  elif T is cstring:
+    cast[pointer](a)
+  else: static: doAssert false, $T
+
+proc setLen*(result: var string, n: int, isInit: bool) =
+  ## when isInit = false, elements are left uninitialized, analog to `{.noinit.}`
+  ## else, there are 0-initialized.
+  # xxx placeholder until system.setLen supports this
+  # to distinguish between algorithms that need 0-initialization vs not; note
+  # that `setLen` for string is inconsistent with `setLen` for seq.
+  # likwise with `newString` vs `newSeq`. This should be fixed in `system`.
+  let n0 = result.len
+  result.setLen(n)
+  if isInit and n > n0:
+    zeroMem(result[n0].addr, n - n0)
+
+proc forceCopy*(result: var string, a: string) =
+  ## also forces a copy if `a` is shallow
+  # the naitve `result = a` would not work if `a` is shallow
+  let n = a.len
+  result.setLen n, isInit = false
+  copyMem(result.dataPointer, a.dataPointer, n)
+
+proc isUpperAscii(c: char): bool {.inline.} =
+  # avoids import strutils.isUpperAscii
+  c in {'A'..'Z'}
+
+proc toLowerAscii*(a: var string) =
+  ## optimized and inplace overload of strutils.toLowerAscii
+  # refs https://github.com/timotheecour/Nim/pull/54
+  # this is 10X faster than a naive implementation using a an optimization trick
+  # that can be adapted in similar contexts. Predictable writes avoid write
+  # hazards and lead to better machine code, compared to random writes arising
+  # from: `if c.isUpperAscii: c = ...`
+  for c in mitems(a):
+    c = chr(c.ord + (if c.isUpperAscii: (ord('a') - ord('A')) else: 0))
diff --git a/tests/stdlib/tstrutils2.nim b/tests/stdlib/tstrutils2.nim
new file mode 100644
index 000000000..881817f90
--- /dev/null
+++ b/tests/stdlib/tstrutils2.nim
@@ -0,0 +1,32 @@
+import "$lib/.." / compiler/strutils2
+
+block: # setLen
+  var a = "abc"
+  a.setLen 0
+  a.setLen 3, isInit = false
+  doAssert a[1] == 'b'
+  a.setLen 0
+  a.setLen 3, isInit = true
+  doAssert a[1] == '\0'
+
+block: # forceCopy
+  var a: string
+  a = "foo"
+  shallow(a)
+  var b: string
+  b = a
+  doAssert b[0].addr == a[0].addr
+  var c: string
+  c.forceCopy a
+  doAssert c == a
+  doAssert c[0].addr != a[0].addr
+
+block: # toLowerAscii
+  var a = "fooBAr"
+  a.toLowerAscii
+  doAssert a == "foobar"
+
+block: # dataPointer
+  var a: string
+  discard a.dataPointer
+  # doAssert a.dataPointer == nil # not guaranteed