about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2016-08-20 19:29:45 -0700
committerKartik K. Agaram <vc@akkartik.com>2016-08-20 19:44:07 -0700
commit3369875ccd0e42a8639509a400d2d9cb1356d79a (patch)
tree20272a7d874d1d35f94aebe0c847a91e819dd9c4
parent18261f194d34d6d8aa2f359052796c4f6d373c1b (diff)
downloadmu-3369875ccd0e42a8639509a400d2d9cb1356d79a.tar.gz
3233 - change how Mu escapes strings
Thanks Sam Putman for helping think through this idea.

When you encounter a backslash, strip it out and pass through any
following run of backslashes. If we 'escaped' a single following
character like C, then the character '\' would be the same as:

  '\\' escaped once
  '\\\\' escaped twice
  '\\\\\\\\' escaped thrice (8 backslashes)

..and so on, the number of backslashes doubling each time. Instead, our
approach is to make the character '\' the same as:

  '\\' escaped once
  '\\\' escaped twice
  '\\\\' escaped thrice

..and so on, the number of backslashes merely increasing by one each
time.

This approach only works as long as backslashes aren't also overloaded
to create special characters. So Mu doesn't follow C's approach of
overloading backslashes both to escape quote characters and also as a
notation for unprintable characters like '\n'.
-rw-r--r--014literal_string.cc30
-rw-r--r--089scenario_filesystem.cc8
-rw-r--r--edit/011-errors.mu7
-rw-r--r--sandbox/011-errors.mu7
4 files changed, 34 insertions, 18 deletions
diff --git a/014literal_string.cc b/014literal_string.cc
index 421e873f..ec6a859f 100644
--- a/014literal_string.cc
+++ b/014literal_string.cc
@@ -64,7 +64,7 @@ void slurp_quoted_comment_oblivious(istream& in, ostream& out) {
   while (has_data(in)) {
     char c = in.get();
     if (c == '\\') {
-      out << static_cast<char>(in.get());
+      slurp_one_past_backslashes(in, out);
       continue;
     }
     out << c;
@@ -83,7 +83,7 @@ void slurp_quoted_comment_aware(istream& in, ostream& out) {
   char c;
   while (in >> c) {
     if (c == '\\') {
-      out << static_cast<char>(in.get());
+      slurp_one_past_backslashes(in, out);
       continue;
     }
     if (c == '#') {
@@ -145,6 +145,28 @@ void strip_last(string& s) {
   if (!s.empty()) s.erase(SIZE(s)-1);
 }
 
+void slurp_one_past_backslashes(istream& in, ostream& out) {
+  // When you encounter a backslash, strip it out and pass through any
+  // following run of backslashes. If we 'escaped' a single following
+  // character, then the character '\' would be:
+  //   '\\' escaped once
+  //   '\\\\' escaped twice
+  //   '\\\\\\\\' escaped thrice (8 backslashes)
+  // ..and so on. With our approach it'll be:
+  //   '\\' escaped once
+  //   '\\\' escaped twice
+  //   '\\\\' escaped thrice
+  // This only works as long as backslashes aren't also overloaded to create
+  // special characters. So Mu doesn't follow C's approach of overloading
+  // backslashes both to escape quote characters and also as a notation for
+  // unprintable characters like '\n'.
+  while (has_data(in)) {
+    char c = in.get();
+    out << c;
+    if (c != '\\') break;
+  }
+}
+
 :(scenario string_literal_nested)
 def main [
   1:address:array:character <- copy [abc [def]]
@@ -157,10 +179,10 @@ def main [
 ]
 +parse:   ingredient: {"abc [def": "literal-string"}
 
-:(scenario string_literal_escaped_comment_aware)
+:(scenario string_literal_escaped_twice)
 def main [
   1:address:array:character <- copy [
-abc \\\[def]
+abc \\[def]
 ]
 +parse:   ingredient: {"\nabc \[def": "literal-string"}
 
diff --git a/089scenario_filesystem.cc b/089scenario_filesystem.cc
index 120d30b7..8acdac0b 100644
--- a/089scenario_filesystem.cc
+++ b/089scenario_filesystem.cc
@@ -54,13 +54,9 @@ scenario assume-filesystem [
   local-scope
   assume-filesystem [
     # file 'a' containing a '|'
-    # ugly as hell that this requires 8 (!) backslashes for 3 '[' block escapes
-    # todo: use Sam Putman's idea to change the delimitors for the '[' blocks
-    # to:
-    #   [''[ ... ['[ ... [ ... ] ... ]'] ... ]'']
-    # That way we'd need just a single backslash -- to escape the |...| environment.
+    # need to escape '\' once for each block
     [a] <- [
-      |x\\\\\\\\|yz|
+      |x\\\\|yz|
     ]
   ]
   data:address:array:file-mapping <- get *filesystem:address:filesystem, data:offset
diff --git a/edit/011-errors.mu b/edit/011-errors.mu
index 080d5d37..774aedb1 100644
--- a/edit/011-errors.mu
+++ b/edit/011-errors.mu
@@ -377,10 +377,9 @@ scenario run-shows-unbalanced-bracket-errors [
   assume-screen 100/width, 15/height
   # recipe is incomplete (unbalanced '[')
   1:address:array:character <- new [ 
-recipe foo «
+recipe foo \\[
   x <- copy 0
 ]
-  replace 1:address:array:character, 171/«, 91  # '['
   2:address:array:character <- new [foo]
   3:address:programming-environment-data <- new-programming-environment screen:address:screen, 1:address:array:character, 2:address:array:character
   assume-console [
@@ -392,10 +391,10 @@ recipe foo «
   screen-should-contain [
     .  errors found                                                                   run (F4)           .
     .                                                  ┊foo                                              .
-    .recipe foo \\\[                                      ┊━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━.
+    .recipe foo \\[                                      ┊━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━.
     .  x <- copy 0                                     ┊                                                 .
     .                                                  ┊                                                 .
-    .9: unbalanced '\\\[' for recipe                      ┊                                                 .
+    .9: unbalanced '\\[' for recipe                      ┊                                                 .
     .┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┊                                                 .
     .                                                  ┊                                                 .
   ]
diff --git a/sandbox/011-errors.mu b/sandbox/011-errors.mu
index e326e446..f2652aaa 100644
--- a/sandbox/011-errors.mu
+++ b/sandbox/011-errors.mu
@@ -362,10 +362,9 @@ scenario run-shows-unbalanced-bracket-errors [
   assume-screen 50/width, 20/height
   # recipe is incomplete (unbalanced '[')
   1:address:array:character <- new [ 
-recipe foo «
+recipe foo \\[
   x <- copy 0
 ]
-  replace 1:address:array:character, 171/«, 91  # '['
   2:address:array:character <- new [foo]
   3:address:programming-environment-data <- new-programming-environment screen:address:screen, 2:address:array:character
   assume-console [
@@ -380,8 +379,8 @@ recipe foo «
     .━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━.
     .0   edit           copy           delete          .
     .foo                                               .
-    .9: unbalanced '\\\[' for recipe                      .
-    .9: unbalanced '\\\[' for recipe                      .
+    .9: unbalanced '\\[' for recipe                      .
+    .9: unbalanced '\\[' for recipe                      .
     .━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━.
     .                                                  .
   ]
ss='oid'>8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
8b1da9bb ^
cf1ddc41 ^
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164