summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorOscar NihlgÄrd <oscarnihlgard@gmail.com>2018-04-23 11:09:48 +0200
committerAndreas Rumpf <rumpf_a@web.de>2018-04-23 11:09:48 +0200
commit8caf257607e0b76f103155add7849fb934371678 (patch)
tree5c8dad266a3d174763f43070a7947dc0ee110b75 /lib
parent72dfe176f5211f561263984a2df653f16dcf5466 (diff)
downloadNim-8caf257607e0b76f103155add7849fb934371678.tar.gz
Don't escape multibyte characters (#7570)
Diffstat (limited to 'lib')
-rw-r--r--lib/pure/strutils.nim17
-rw-r--r--lib/system.nim42
2 files changed, 47 insertions, 12 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 849c16c34..cdc5ec4f9 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -1818,20 +1818,29 @@ proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect,
     dec(L)
 
 proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
-  rtl, extern: "nsuEscape".} =
+  rtl, extern: "nsuEscape", deprecated.} =
   ## Escapes a string `s`. See `system.addEscapedChar <system.html#addEscapedChar>`_
   ## for the escaping scheme.
   ##
   ## The resulting string is prefixed with `prefix` and suffixed with `suffix`.
   ## Both may be empty strings.
+  ##
+  ## **Warning:** This procedure is deprecated because it's to easy to missuse.
   result = newStringOfCap(s.len + s.len shr 2)
   result.add(prefix)
   for c in items(s):
-    result.addEscapedChar(c)
+    case c
+    of '\0'..'\31', '\127'..'\255':
+      add(result, "\\x")
+      add(result, toHex(ord(c), 2))
+    of '\\': add(result, "\\\\")
+    of '\'': add(result, "\\'")
+    of '\"': add(result, "\\\"")
+    else: add(result, c)
   add(result, suffix)
 
 proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
-  rtl, extern: "nsuUnescape".} =
+  rtl, extern: "nsuUnescape", deprecated.} =
   ## Unescapes a string `s`.
   ##
   ## This complements `escape <#escape>`_ as it performs the opposite
@@ -1839,6 +1848,8 @@ proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
   ##
   ## If `s` does not begin with ``prefix`` and end with ``suffix`` a
   ## ValueError exception will be raised.
+  ##
+  ## **Warning:** This procedure is deprecated because it's to easy to missuse.  
   result = newStringOfCap(s.len)
   var i = prefix.len
   if not s.startsWith(prefix):
diff --git a/lib/system.nim b/lib/system.nim
index 874b8c4d1..98c133428 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -3933,29 +3933,48 @@ proc addEscapedChar*(s: var string, c: char) {.noSideEffect, inline.} =
   ## * replaces any ``\`` by ``\\``
   ## * replaces any ``'`` by ``\'``
   ## * replaces any ``"`` by ``\"``
-  ## * replaces any other character in the set ``{'\0'..'\31', '\127'..'\255'}``
+  ## * replaces any ``\a`` by ``\\a``
+  ## * replaces any ``\b`` by ``\\b``
+  ## * replaces any ``\t`` by ``\\t``
+  ## * replaces any ``\n`` by ``\\n``
+  ## * replaces any ``\v`` by ``\\v``
+  ## * replaces any ``\f`` by ``\\f``
+  ## * replaces any ``\c`` by ``\\c``
+  ## * replaces any ``\e`` by ``\\e``
+  ## * replaces any other character not in the set ``{'\21..'\126'}
   ##   by ``\xHH`` where ``HH`` is its hexadecimal value.
   ##
   ## The procedure has been designed so that its output is usable for many
   ## different common syntaxes.
   ## **Note**: This is not correct for producing Ansi C code!
   case c
-  of '\0'..'\31', '\127'..'\255':
-    add(s, "\\x")
+  of '\a': s.add "\\a" # \x07
+  of '\b': s.add "\\b" # \x08
+  of '\t': s.add "\\t" # \x09
+  of '\n': s.add "\\n" # \x0A
+  of '\v': s.add "\\v" # \x0B
+  of '\f': s.add "\\f" # \x0C
+  of '\c': s.add "\\c" # \x0D
+  of '\e': s.add "\\e" # \x1B
+  of '\\': s.add("\\\\")
+  of '\'': s.add("\\'")
+  of '\"': s.add("\\\"")
+  of {'\32'..'\126'} - {'\\', '\'', '\"'}: s.add(c)
+  else:
+    s.add("\\x")
     const HexChars = "0123456789ABCDEF"
     let n = ord(c)
     s.add(HexChars[int((n and 0xF0) shr 4)])
     s.add(HexChars[int(n and 0xF)])
-  of '\\': add(s, "\\\\")
-  of '\'': add(s, "\\'")
-  of '\"': add(s, "\\\"")
-  else: add(s, c)
 
 proc addQuoted*[T](s: var string, x: T) =
   ## Appends `x` to string `s` in place, applying quoting and escaping
   ## if `x` is a string or char. See
   ## `addEscapedChar <system.html#addEscapedChar>`_
-  ## for the escaping scheme.
+  ## for the escaping scheme. When `x` is a string, characters in the
+  ## range ``{\128..\255}`` are never escaped so that multibyte UTF-8
+  ## characters are untouched (note that this behavior is different from
+  ## ``addEscapedChar``).
   ##
   ## The Nim standard library uses this function on the elements of
   ## collections when producing a string representation of a collection.
@@ -3974,7 +3993,12 @@ proc addQuoted*[T](s: var string, x: T) =
   when T is string:
     s.add("\"")
     for c in x:
-      s.addEscapedChar(c)
+      # Only ASCII chars are escaped to avoid butchering
+      # multibyte UTF-8 characters.
+      if c <= 127.char:
+        s.addEscapedChar(c)
+      else:
+        s.add c
     s.add("\"")
   elif T is char:
     s.add("'")