3233 - change how Mu escapes strings

Thanks Sam Putman for helping think through this idea. When you encounter a backslash, strip it out and pass through any following run of backslashes. If we 'escaped' a single following character like C, then the character '\' would be the same as: '\\' escaped once '\\\\' escaped twice '\\\\\\\\' escaped thrice (8 backslashes) ..and so on, the number of backslashes doubling each time. Instead, our approach is to make the character '\' the same as: '\\' escaped once '\\\' escaped twice '\\\\' escaped thrice ..and so on, the number of backslashes merely increasing by one each time. This approach only works as long as backslashes aren't also overloaded to create special characters. So Mu doesn't follow C's approach of overloading backslashes both to escape quote characters and also as a notation for unprintable characters like '\n'.
author: Kartik K. Agaram <vc@akkartik.com> 2016-08-20 19:29:45 -0700
committer: Kartik K. Agaram <vc@akkartik.com> 2016-08-20 19:44:07 -0700
commit: 3369875ccd0e42a8639509a400d2d9cb1356d79a (patch)
tree: 20272a7d874d1d35f94aebe0c847a91e819dd9c4 /014literal_string.cc
parent: 18261f194d34d6d8aa2f359052796c4f6d373c1b (diff)
download: mu-3369875ccd0e42a8639509a400d2d9cb1356d79a.tar.gz
1 files changed, 26 insertions, 4 deletions
diff --git a/014literal_string.cc b/014literal_string.cc
index 421e873f..ec6a859f 100644
--- a/014literal_string.cc
+++ b/014literal_string.cc
@@ -64,7 +64,7 @@ void slurp_quoted_comment_oblivious(istream& in, ostream& out) {
   while (has_data(in)) {
     char c = in.get();
     if (c == '\\') {
-      out << static_cast<char>(in.get());
+      slurp_one_past_backslashes(in, out);
       continue;
     }
     out << c;
@@ -83,7 +83,7 @@ void slurp_quoted_comment_aware(istream& in, ostream& out) {
   char c;
   while (in >> c) {
     if (c == '\\') {
-      out << static_cast<char>(in.get());
+      slurp_one_past_backslashes(in, out);
       continue;
     }
     if (c == '#') {
@@ -145,6 +145,28 @@ void strip_last(string& s) {
   if (!s.empty()) s.erase(SIZE(s)-1);
 }
 
+void slurp_one_past_backslashes(istream& in, ostream& out) {
+  // When you encounter a backslash, strip it out and pass through any
+  // following run of backslashes. If we 'escaped' a single following
+  // character, then the character '\' would be:
+  //   '\\' escaped once
+  //   '\\\\' escaped twice
+  //   '\\\\\\\\' escaped thrice (8 backslashes)
+  // ..and so on. With our approach it'll be:
+  //   '\\' escaped once
+  //   '\\\' escaped twice
+  //   '\\\\' escaped thrice
+  // This only works as long as backslashes aren't also overloaded to create
+  // special characters. So Mu doesn't follow C's approach of overloading
+  // backslashes both to escape quote characters and also as a notation for
+  // unprintable characters like '\n'.
+  while (has_data(in)) {
+    char c = in.get();
+    out << c;
+    if (c != '\\') break;
+  }
+}
+
 :(scenario string_literal_nested)
 def main [
   1:address:array:character <- copy [abc [def]]
@@ -157,10 +179,10 @@ def main [
 ]
 +parse:   ingredient: {"abc [def": "literal-string"}
 
-:(scenario string_literal_escaped_comment_aware)
+:(scenario string_literal_escaped_twice)
 def main [
   1:address:array:character <- copy [
-abc \\\[def]
+abc \\[def]
 ]
 +parse:   ingredient: {"\nabc \[def": "literal-string"}
author	Kartik K. Agaram <vc@akkartik.com>	2016-08-20 19:29:45 -0700
committer	Kartik K. Agaram <vc@akkartik.com>	2016-08-20 19:44:07 -0700
commit	3369875ccd0e42a8639509a400d2d9cb1356d79a (patch)
tree	20272a7d874d1d35f94aebe0c847a91e819dd9c4 /014literal_string.cc
parent	18261f194d34d6d8aa2f359052796c4f6d373c1b (diff)
download	mu-3369875ccd0e42a8639509a400d2d9cb1356d79a.tar.gz