summary refs log tree commit diff stats
path: root/lib/std/private
diff options
context:
space:
mode:
authorhavardjohn <havard.mjaavatten@outlook.com>2022-08-20 10:30:11 +0200
committerGitHub <noreply@github.com>2022-08-20 04:30:11 -0400
commitf4bbf3bf0b5188352ee996831f921150760fb112 (patch)
tree8916efd708428672c22197f4170f0b5b60f601cf /lib/std/private
parent641381e3d47afba95f99efc77bb9a5ed65d07b3a (diff)
downloadNim-f4bbf3bf0b5188352ee996831f921150760fb112.tar.gz
Add use of Windows Wide CRT API for env. vars (#20084)
* Add use of Windows Wide CRT API for env. vars

Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.

Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.

Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.

Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.

Add tests that test environment variables with Unicode characters in
their name or value.

* Fix test issues

Fixes

* `nim cpp` didn't compile the tests
* Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed
  with "cannot importc"

* Fix missing error check on `wcstombs`

* Fix ANSI testing errors

* Separate ANSI-related testing to their own tests, and only executing
  them if running process has a specific code page
  * Setting locale with `setlocale` was not reliable and didn't work on
    certain machines
* Add handling of a "no character representation" error in second
  `wcstombs` call

* tests/newruntime_misc: Increment allocCount

Increments overall allocations in `tnewruntime_misc` test. This is
because `getEnv` now does an additional allocation: allocation of the
UTF-16 string used as parameter to `c_wgetenv`.

* Revert "tests/newruntime_misc: Increment allocCount"

This reverts commit 4d4fe8bd3edb1bfc6d600f247af797c7552f5477.

* tests/newruntime_misc: Increment allocCount on Windows

Increments overall allocations in `tnewruntime_misc` test for Windows.
This is because `getEnv` on Windows now does an additional allocation:
allocation of the UTF-16 string used as parameter to `c_wgetenv`.

* Refactor, adding suggestions from code review

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

* Document, adding suggestions

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com>
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
Diffstat (limited to 'lib/std/private')
-rw-r--r--lib/std/private/win_setenv.nim65
1 files changed, 36 insertions, 29 deletions
diff --git a/lib/std/private/win_setenv.nim b/lib/std/private/win_setenv.nim
index 0dfe0ed46..89bb0421f 100644
--- a/lib/std/private/win_setenv.nim
+++ b/lib/std/private/win_setenv.nim
@@ -25,27 +25,30 @@ when not defined(windows): discard
 else:
   type wchar_t  {.importc: "wchar_t".} = int16
 
-  proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {.
-    stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.}
+  proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {.
+    stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.}
     # same as winlean.setEnvironmentVariableA
 
-  proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
-  proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
-  proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "<stdlib.h>".}
+  proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
+  proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "<stdlib.h>".}
+  proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
 
   var errno {.importc, header: "<errno.h>".}: cint
-  var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t
+  var genviron {.importc: "_environ".}: ptr ptr char
     # xxx `ptr UncheckedArray[WideCString]` did not work
 
-  proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "<stdlib.h>".}
+  proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "<stdlib.h>".}
     # xxx cint vs errno_t?
 
   proc setEnvImpl*(name: string, value: string, overwrite: cint): cint =
     const EINVAL = cint(22)
-    if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0
+    let wideName = newWideCString(name)
+    if overwrite == 0 and c_wgetenv(wideName) != nil:
+      return 0
+
     if value != "":
       let envstring = name & "=" & value
-      let e = c_putenv(cstring(envstring))
+      let e = c_wputenv(newWideCString(envstring))
       if e != 0:
         errno = EINVAL
         return -1
@@ -57,40 +60,44 @@ else:
     so we have to do these terrible things.
     ]#
     let envstring = name & "=  "
-    if c_putenv(cstring(envstring)) != 0:
+    if c_wputenv(newWideCString(envstring)) != 0:
       errno = EINVAL
       return -1
     # Here lies the documentation we blatently ignore to make this work.
-    var s = c_getenv(cstring(name))
-    s[0] = '\0'
+    var s = c_wgetenv(wideName)
+    s[0] = Utf16Char('\0')
     #[
     This would result in a double null termination, which normally signifies the
     end of the environment variable list, so we stick a completely empty
     environment variable into the list instead.
     ]#
-    s = c_getenv(cstring(name))
-    s[1] = '='
+    s = c_wgetenv(wideName)
+    s[1] = Utf16Char('=')
     #[
-    If gWenviron is null, the wide environment has not been initialized
+    If genviron is null, the MBCS environment has not been initialized
     yet, and we don't need to try to update it. We have to do this otherwise
-    we'd be forcing the initialization and maintenance of the wide environment
+    we'd be forcing the initialization and maintenance of the MBCS environment
     even though it's never actually used in most programs.
     ]#
-    if gWenviron != nil:
-      # var buf: array[MAX_ENV + 1, WideCString]
-      let requiredSize = mbstowcs(nil, cstring(name), 0).int
-      var buf = newSeq[Utf16Char](requiredSize + 1)
-      let buf2 = cast[ptr wchar_t](buf[0].addr)
-      if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)):
-        errno = EINVAL
-        return -1
-      var ptrToEnv = cast[WideCString](c_wgetenv(buf2))
-      ptrToEnv[0] = '\0'.Utf16Char
-      ptrToEnv = cast[WideCString](c_wgetenv(buf2))
-      ptrToEnv[1] = '='.Utf16Char
+    if genviron != nil:
+
+      # wcstombs returns `high(csize_t)` if any characters cannot be represented
+      # in the current codepage. Skip updating MBCS environment in this case.
+      # For some reason, second `wcstombs` can find non-convertible characters
+      # that the first `wcstombs` cannot.
+      let requiredSizeS = wcstombs(nil, wideName, 0)
+      if requiredSizeS != high(csize_t):
+        let requiredSize = requiredSizeS.int
+        var buf = newSeq[char](requiredSize + 1)
+        let buf2 = buf[0].addr
+        if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t):
+          var ptrToEnv = c_getenv(buf2)
+          ptrToEnv[0] = '\0'
+          ptrToEnv = c_getenv(buf2)
+          ptrToEnv[1] = '='
 
     # And now, we have to update the outer environment to have a proper empty value.
-    if setEnvironmentVariableA(cstring(name), cstring(value)) == 0:
+    if setEnvironmentVariableW(wideName, value.newWideCString) == 0:
       errno = EINVAL
       return -1
     return 0