diff options
author | havardjohn <havard.mjaavatten@outlook.com> | 2022-08-20 10:30:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-20 04:30:11 -0400 |
commit | f4bbf3bf0b5188352ee996831f921150760fb112 (patch) | |
tree | 8916efd708428672c22197f4170f0b5b60f601cf /lib/std/private | |
parent | 641381e3d47afba95f99efc77bb9a5ed65d07b3a (diff) | |
download | Nim-f4bbf3bf0b5188352ee996831f921150760fb112.tar.gz |
Add use of Windows Wide CRT API for env. vars (#20084)
* Add use of Windows Wide CRT API for env. vars Replaces use of CRT API `getenv` and `putenv` with respectively `_wgetenv` and `_wputenv`. Motivation is to reliably convert environment variables to UTF-8, and the wide API is best there, because it's reliably UTF-16. Changed the hack in `lib/std/private/win_setenv.nim` by switching the order of the Unicode and MBCS environment update; Unicode first, MBCS second. Because `_wgetenv`/`_wputenv` is now used, the Unicode environment will be initialized, so it should always be updated. Stop updating MBCS environment with the name of `getEnv`. It's not necessarily true that MBCS encoding and the `string` encoding is the same. Instead convert UTF-16 to current Windows code page with `wcstombs`, and use that string to update MBCS. Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or `std/os.getEnv` on Windows to return non-UTF-8 encoded strings. Add tests that test environment variables with Unicode characters in their name or value. * Fix test issues Fixes * `nim cpp` didn't compile the tests * Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed with "cannot importc" * Fix missing error check on `wcstombs` * Fix ANSI testing errors * Separate ANSI-related testing to their own tests, and only executing them if running process has a specific code page * Setting locale with `setlocale` was not reliable and didn't work on certain machines * Add handling of a "no character representation" error in second `wcstombs` call * tests/newruntime_misc: Increment allocCount Increments overall allocations in `tnewruntime_misc` test. This is because `getEnv` now does an additional allocation: allocation of the UTF-16 string used as parameter to `c_wgetenv`. * Revert "tests/newruntime_misc: Increment allocCount" This reverts commit 4d4fe8bd3edb1bfc6d600f247af797c7552f5477. * tests/newruntime_misc: Increment allocCount on Windows Increments overall allocations in `tnewruntime_misc` test for Windows. This is because `getEnv` on Windows now does an additional allocation: allocation of the UTF-16 string used as parameter to `c_wgetenv`. * Refactor, adding suggestions from code review Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com> * Document, adding suggestions Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com> Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com> Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
Diffstat (limited to 'lib/std/private')
-rw-r--r-- | lib/std/private/win_setenv.nim | 65 |
1 files changed, 36 insertions, 29 deletions
diff --git a/lib/std/private/win_setenv.nim b/lib/std/private/win_setenv.nim index 0dfe0ed46..89bb0421f 100644 --- a/lib/std/private/win_setenv.nim +++ b/lib/std/private/win_setenv.nim @@ -25,27 +25,30 @@ when not defined(windows): discard else: type wchar_t {.importc: "wchar_t".} = int16 - proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {. - stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.} + proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {. + stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.} # same as winlean.setEnvironmentVariableA - proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".} - proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".} - proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "<stdlib.h>".} + proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".} + proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "<stdlib.h>".} + proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".} var errno {.importc, header: "<errno.h>".}: cint - var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t + var genviron {.importc: "_environ".}: ptr ptr char # xxx `ptr UncheckedArray[WideCString]` did not work - proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "<stdlib.h>".} + proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "<stdlib.h>".} # xxx cint vs errno_t? proc setEnvImpl*(name: string, value: string, overwrite: cint): cint = const EINVAL = cint(22) - if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0 + let wideName = newWideCString(name) + if overwrite == 0 and c_wgetenv(wideName) != nil: + return 0 + if value != "": let envstring = name & "=" & value - let e = c_putenv(cstring(envstring)) + let e = c_wputenv(newWideCString(envstring)) if e != 0: errno = EINVAL return -1 @@ -57,40 +60,44 @@ else: so we have to do these terrible things. ]# let envstring = name & "= " - if c_putenv(cstring(envstring)) != 0: + if c_wputenv(newWideCString(envstring)) != 0: errno = EINVAL return -1 # Here lies the documentation we blatently ignore to make this work. - var s = c_getenv(cstring(name)) - s[0] = '\0' + var s = c_wgetenv(wideName) + s[0] = Utf16Char('\0') #[ This would result in a double null termination, which normally signifies the end of the environment variable list, so we stick a completely empty environment variable into the list instead. ]# - s = c_getenv(cstring(name)) - s[1] = '=' + s = c_wgetenv(wideName) + s[1] = Utf16Char('=') #[ - If gWenviron is null, the wide environment has not been initialized + If genviron is null, the MBCS environment has not been initialized yet, and we don't need to try to update it. We have to do this otherwise - we'd be forcing the initialization and maintenance of the wide environment + we'd be forcing the initialization and maintenance of the MBCS environment even though it's never actually used in most programs. ]# - if gWenviron != nil: - # var buf: array[MAX_ENV + 1, WideCString] - let requiredSize = mbstowcs(nil, cstring(name), 0).int - var buf = newSeq[Utf16Char](requiredSize + 1) - let buf2 = cast[ptr wchar_t](buf[0].addr) - if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)): - errno = EINVAL - return -1 - var ptrToEnv = cast[WideCString](c_wgetenv(buf2)) - ptrToEnv[0] = '\0'.Utf16Char - ptrToEnv = cast[WideCString](c_wgetenv(buf2)) - ptrToEnv[1] = '='.Utf16Char + if genviron != nil: + + # wcstombs returns `high(csize_t)` if any characters cannot be represented + # in the current codepage. Skip updating MBCS environment in this case. + # For some reason, second `wcstombs` can find non-convertible characters + # that the first `wcstombs` cannot. + let requiredSizeS = wcstombs(nil, wideName, 0) + if requiredSizeS != high(csize_t): + let requiredSize = requiredSizeS.int + var buf = newSeq[char](requiredSize + 1) + let buf2 = buf[0].addr + if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t): + var ptrToEnv = c_getenv(buf2) + ptrToEnv[0] = '\0' + ptrToEnv = c_getenv(buf2) + ptrToEnv[1] = '=' # And now, we have to update the outer environment to have a proper empty value. - if setEnvironmentVariableA(cstring(name), cstring(value)) == 0: + if setEnvironmentVariableW(wideName, value.newWideCString) == 0: errno = EINVAL return -1 return 0 |