diff options
author | Oscar NihlgÄrd <oscarnihlgard@gmail.com> | 2018-04-23 11:09:48 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2018-04-23 11:09:48 +0200 |
commit | 8caf257607e0b76f103155add7849fb934371678 (patch) | |
tree | 5c8dad266a3d174763f43070a7947dc0ee110b75 /lib | |
parent | 72dfe176f5211f561263984a2df653f16dcf5466 (diff) | |
download | Nim-8caf257607e0b76f103155add7849fb934371678.tar.gz |
Don't escape multibyte characters (#7570)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pure/strutils.nim | 17 | ||||
-rw-r--r-- | lib/system.nim | 42 |
2 files changed, 47 insertions, 12 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 849c16c34..cdc5ec4f9 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -1818,20 +1818,29 @@ proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect, dec(L) proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, - rtl, extern: "nsuEscape".} = + rtl, extern: "nsuEscape", deprecated.} = ## Escapes a string `s`. See `system.addEscapedChar <system.html#addEscapedChar>`_ ## for the escaping scheme. ## ## The resulting string is prefixed with `prefix` and suffixed with `suffix`. ## Both may be empty strings. + ## + ## **Warning:** This procedure is deprecated because it's to easy to missuse. result = newStringOfCap(s.len + s.len shr 2) result.add(prefix) for c in items(s): - result.addEscapedChar(c) + case c + of '\0'..'\31', '\127'..'\255': + add(result, "\\x") + add(result, toHex(ord(c), 2)) + of '\\': add(result, "\\\\") + of '\'': add(result, "\\'") + of '\"': add(result, "\\\"") + else: add(result, c) add(result, suffix) proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, - rtl, extern: "nsuUnescape".} = + rtl, extern: "nsuUnescape", deprecated.} = ## Unescapes a string `s`. ## ## This complements `escape <#escape>`_ as it performs the opposite @@ -1839,6 +1848,8 @@ proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, ## ## If `s` does not begin with ``prefix`` and end with ``suffix`` a ## ValueError exception will be raised. + ## + ## **Warning:** This procedure is deprecated because it's to easy to missuse. result = newStringOfCap(s.len) var i = prefix.len if not s.startsWith(prefix): diff --git a/lib/system.nim b/lib/system.nim index 874b8c4d1..98c133428 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -3933,29 +3933,48 @@ proc addEscapedChar*(s: var string, c: char) {.noSideEffect, inline.} = ## * replaces any ``\`` by ``\\`` ## * replaces any ``'`` by ``\'`` ## * replaces any ``"`` by ``\"`` - ## * replaces any other character in the set ``{'\0'..'\31', '\127'..'\255'}`` + ## * replaces any ``\a`` by ``\\a`` + ## * replaces any ``\b`` by ``\\b`` + ## * replaces any ``\t`` by ``\\t`` + ## * replaces any ``\n`` by ``\\n`` + ## * replaces any ``\v`` by ``\\v`` + ## * replaces any ``\f`` by ``\\f`` + ## * replaces any ``\c`` by ``\\c`` + ## * replaces any ``\e`` by ``\\e`` + ## * replaces any other character not in the set ``{'\21..'\126'} ## by ``\xHH`` where ``HH`` is its hexadecimal value. ## ## The procedure has been designed so that its output is usable for many ## different common syntaxes. ## **Note**: This is not correct for producing Ansi C code! case c - of '\0'..'\31', '\127'..'\255': - add(s, "\\x") + of '\a': s.add "\\a" # \x07 + of '\b': s.add "\\b" # \x08 + of '\t': s.add "\\t" # \x09 + of '\n': s.add "\\n" # \x0A + of '\v': s.add "\\v" # \x0B + of '\f': s.add "\\f" # \x0C + of '\c': s.add "\\c" # \x0D + of '\e': s.add "\\e" # \x1B + of '\\': s.add("\\\\") + of '\'': s.add("\\'") + of '\"': s.add("\\\"") + of {'\32'..'\126'} - {'\\', '\'', '\"'}: s.add(c) + else: + s.add("\\x") const HexChars = "0123456789ABCDEF" let n = ord(c) s.add(HexChars[int((n and 0xF0) shr 4)]) s.add(HexChars[int(n and 0xF)]) - of '\\': add(s, "\\\\") - of '\'': add(s, "\\'") - of '\"': add(s, "\\\"") - else: add(s, c) proc addQuoted*[T](s: var string, x: T) = ## Appends `x` to string `s` in place, applying quoting and escaping ## if `x` is a string or char. See ## `addEscapedChar <system.html#addEscapedChar>`_ - ## for the escaping scheme. + ## for the escaping scheme. When `x` is a string, characters in the + ## range ``{\128..\255}`` are never escaped so that multibyte UTF-8 + ## characters are untouched (note that this behavior is different from + ## ``addEscapedChar``). ## ## The Nim standard library uses this function on the elements of ## collections when producing a string representation of a collection. @@ -3974,7 +3993,12 @@ proc addQuoted*[T](s: var string, x: T) = when T is string: s.add("\"") for c in x: - s.addEscapedChar(c) + # Only ASCII chars are escaped to avoid butchering + # multibyte UTF-8 characters. + if c <= 127.char: + s.addEscapedChar(c) + else: + s.add c s.add("\"") elif T is char: s.add("'") |