fixes #11618 (#11969)

author: Andreas Rumpf <rumpf_a@web.de> 2019-08-17 21:19:57 +0200
committer: GitHub <noreply@github.com> 2019-08-17 21:19:57 +0200
commit: 7cb31455ee949ee006e2f314daa74866cf37a74e (patch)
tree: 641661998fac2b6a14873e6b661d5e48be61c502
parent: b68380f09b1926f4aea89b14b52bb442d5b002de (diff)
download: Nim-7cb31455ee949ee006e2f314daa74866cf37a74e.tar.gz
5 files changed, 44 insertions, 9 deletions
diff --git a/changelog.md b/changelog.md
index e662ea110..3780c270b 100644
--- a/changelog.md
+++ b/changelog.md
@@ -6,6 +6,9 @@
 - The switch ``-d:nimBinaryStdFiles`` does not exist anymore. Instead
   stdin/stdout/stderr are binary files again. This change only affects
   Windows.
+- On Windows console applications the code-page is set at program startup
+  to UTF-8. Use the new switch `-d:nimDontSetUtf8CodePage` to disable this
+  feature.
 
 ### Breaking changes in the standard library
 
@@ -15,6 +18,9 @@
 
 ## Library additions
 
+- `encodings.getCurrentEncoding` now distinguishes between the console's
+  encoding and the OS's encoding. This distinction is only meaningful on
+  Windows.
 
 ## Library changes
 
diff --git a/lib/pure/encodings.nim b/lib/pure/encodings.nim
index daf2ca0a2..874415bcf 100644
--- a/lib/pure/encodings.nim
+++ b/lib/pure/encodings.nim
@@ -231,6 +231,7 @@ when defined(windows):
     result = ""
 
   proc getACP(): CodePage {.stdcall, importc: "GetACP", dynlib: "kernel32".}
+  proc getGetConsoleCP(): CodePage {.stdcall, importc: "GetConsoleCP", dynlib: "kernel32".}
 
   proc multiByteToWideChar(
     codePage: CodePage,
@@ -292,10 +293,12 @@ else:
              outbuf: var cstring, outbytesLeft: var int): int {.
     importc: "iconv", importIconv.}
 
-proc getCurrentEncoding*(): string =
+proc getCurrentEncoding*(uiApp = false): string =
   ## retrieves the current encoding. On Unix, always "UTF-8" is returned.
+  ## The `uiApp` parameter is Windows specific. If true, the UI's code-page
+  ## is returned, if false, the Console's code-page is returned.
   when defined(windows):
-    result = codePageToName(getACP())
+    result = codePageToName(if uiApp: getACP() else: getGetConsoleCP())
   else:
     result = "UTF-8"
 
@@ -509,7 +512,7 @@ when not defined(testing) and isMainModule and defined(windows):
     let original = "\x42\x04\x35\x04\x41\x04\x42\x04" # utf-16 little endian test string "тест"
     let result = convert(original, "windows-1251", "utf-16")
     doAssert(result == "\xf2\xe5\xf1\xf2")
-  
+
   block should_convert_from_win1251_to_koi8r:
     let original = "\xf2\xe5\xf1\xf2" # win1251 test string "тест"
     let result = convert(original, "koi8-r", "windows-1251")
diff --git a/lib/system.nim b/lib/system.nim
index 850e03352..d582411d8 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -4496,7 +4496,8 @@ proc substr*(s: string, first = 0): string =
 when defined(nimconfig):
   include "system/nimscript"
 
-when defined(windows) and appType == "console" and defined(nimSetUtf8CodePage) and not defined(nimscript):
+when defined(windows) and appType == "console" and
+    not defined(nimDontSetUtf8CodePage) and not defined(nimscript):
   proc setConsoleOutputCP(codepage: cint): cint {.stdcall, dynlib: "kernel32",
     importc: "SetConsoleOutputCP".}
   discard setConsoleOutputCP(65001) # 65001 - utf-8 codepage
diff --git a/lib/system/io.nim b/lib/system/io.nim
index ec7618f6d..7f7df74ba 100644
--- a/lib/system/io.nim
+++ b/lib/system/io.nim
@@ -194,9 +194,30 @@ proc writeChars*(f: File, a: openArray[char], start, len: Natural): int {.
   var x = cast[ptr UncheckedArray[int8]](a)
   result = writeBuffer(f, addr(x[int(start)]), len)
 
+when defined(windows):
+  proc writeWindows(f: File; s: string; doRaise = false) =
+    # Don't ask why but the 'printf' family of function is the only thing
+    # that writes utf-8 strings reliably on Windows. At least on my Win 10
+    # machine. We also enable `setConsoleOutputCP(65001)` now by default.
+    # But we cannot call printf directly as the string might contain \0.
+    # So we have to loop over all the sections separated by potential \0s.
+    var i = c_fprintf(f, "%s", s)
+    while i < s.len and false:
+      if s[i] == '\0':
+        inc i
+      else:
+        let w = c_fprintf(f, "%s", unsafeAddr s[i])
+        if w <= 0:
+          if doRaise: raiseEIO("cannot write string to file")
+          break
+        inc i, w
+
 proc write*(f: File, s: string) {.tags: [WriteIOEffect], benign.} =
-  if writeBuffer(f, cstring(s), s.len) != s.len:
-    raiseEIO("cannot write string to file")
+  when defined(windows):
+    writeWindows(f, s, doRaise = true)
+  else:
+    if writeBuffer(f, cstring(s), s.len) != s.len:
+      raiseEIO("cannot write string to file")
 {.pop.}
 
 when NoFakeVars:
@@ -559,8 +580,11 @@ when declared(stdout):
     when defined(windows) and compileOption("threads"):
       acquireSys echoLock
     for s in args:
-      discard c_fwrite(s.cstring, s.len, 1, stdout)
-    const linefeed = "\n" # can be 1 or more chars
+      when defined(windows):
+        writeWindows(stdout, s)
+      else:
+        discard c_fwrite(s.cstring, s.len, 1, stdout)
+    const linefeed = "\n"
     discard c_fwrite(linefeed.cstring, linefeed.len, 1, stdout)
     discard c_fflush(stdout)
     when not defined(windows) and not defined(android) and not defined(nintendoswitch):
diff --git a/tests/exception/t9657.nim b/tests/exception/t9657.nim
index c96a0a597..0b6e128e0 100644
--- a/tests/exception/t9657.nim
+++ b/tests/exception/t9657.nim
@@ -5,4 +5,5 @@ discard """
 """
 # todo: remove `target: "c"` workaround once #10343 is properly fixed
 close stdmsg
-writeLine stdmsg, "exception!"
+const m = "exception!"
+discard writeBuffer(stdmsg, cstring(m), m.len)
author	Andreas Rumpf <rumpf_a@web.de>	2019-08-17 21:19:57 +0200
committer	GitHub <noreply@github.com>	2019-08-17 21:19:57 +0200
commit	7cb31455ee949ee006e2f314daa74866cf37a74e (patch)
tree	641661998fac2b6a14873e6b661d5e48be61c502
parent	b68380f09b1926f4aea89b14b52bb442d5b002de (diff)
download	Nim-7cb31455ee949ee006e2f314daa74866cf37a74e.tar.gz