https://github.com/akkartik/mu/blob/master/038---literal_strings.cc
1
2
3
4
5
6 void test_transform_literal_string() {
7 run(
8 "== code 0x1\n"
9 "b8/copy \"test\"/imm32\n"
10 "== data 0x2000\n"
11 );
12 CHECK_TRACE_CONTENTS(
13 "transform: -- move literal strings to data segment\n"
14 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
15 "transform: line after transform: 'b8 __subx_global_1'\n"
16 );
17 }
18
19
20
21
22 :(after "Begin Transforms")
23
24 Transform.push_back(transform_literal_strings);
25
26
27 :(before "End Globals")
28 int Next_auto_global = 1;
29 :(before "End Reset")
30 Next_auto_global = 1;
31 :(code)
32 void transform_literal_strings(program& p) {
33 trace(3, "transform") << "-- move literal strings to data segment" << end();
34 if (p.segments.empty()) return;
35 vector<line> new_lines;
36 for (int s = 0; s < SIZE(p.segments); ++s) {
37 segment& seg = p.segments.at(s);
38 trace(99, "transform") << "segment '" << seg.name << "'" << end();
39 for (int i = 0; i < SIZE(seg.lines); ++i) {
40
41 line& line = seg.lines.at(i);
42 for (int j = 0; j < SIZE(line.words); ++j) {
43 word& curr = line.words.at(j);
44 if (curr.data.at(0) != '"') continue;
45 ostringstream global_name;
46 global_name << "__subx_global_" << Next_auto_global;
47 ++Next_auto_global;
48 add_global_to_data_segment(global_name.str(), curr, new_lines);
49 curr.data = global_name.str();
50 }
51 trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end();
52 }
53 }
54 segment* data = find(p, "data");
55 if (data)
56 data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end());
57 }
58
59 void add_global_to_data_segment(const string& name, const word& value, vector<line>& out) {
60 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
61
62 out.push_back(label(name));
63
64 out.push_back(line());
65 emit_hex_bytes(out.back(), SIZE(value.data)-2, 4);
66
67 out.push_back(line());
68 line& curr = out.back();
69 for (int i = 1; i < SIZE(value.data)-1; ++i) {
70 char c = value.data.at(i);
71 curr.words.push_back(word());
72 curr.words.back().data = hex_byte_to_string(c);
73 curr.words.back().metadata.push_back(string(1, c));
74 }
75 }
76
77
78
79
80 void test_instruction_with_string_literal() {
81 parse_instruction_character_by_character(
82 "a \"abc def\" z\n"
83 );
84 CHECK_TRACE_CONTENTS(
85 "parse2: word: a\n"
86 "parse2: word: \"abc def\"\n"
87 "parse2: word: z\n"
88 );
89
90 CHECK_TRACE_COUNT("parse2", 3);
91 }
92
93 void test_string_literal_in_data_segment() {
94 run(
95 "== code 0x1\n"
96 "b8/copy X/imm32\n"
97 "== data 0x2000\n"
98 "X:\n"
99 "\"test\"/imm32\n"
100 );
101 CHECK_TRACE_CONTENTS(
102 "transform: -- move literal strings to data segment\n"
103 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
104 "transform: line after transform: '__subx_global_1'\n"
105 );
106 }
107
108 :(before "End Line Parsing Special-cases(line_data -> l)")
109 if (line_data.find('"') != string::npos) {
110 parse_instruction_character_by_character(line_data, l);
111 continue;
112 }
113
114 :(code)
115 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
116 if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
117 raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
118 return;
119 }
120
121 istringstream in(line_data);
122 in >> std::noskipws;
123 line result;
124 result.original = line_data;
125
126 while (has_data(in)) {
127 skip_whitespace(in);
128 if (!has_data(in)) break;
129 char c = in.get();
130 if (c == '#') break;
131 if (c == ':') break;
132 if (c == '.') {
133 if (!has_data(in)) break;
134 if (isspace(in.peek()))
135 continue;
136 }
137 result.words.push_back(word());
138 if (c == '"') {
139
140 ostringstream d;
141 d << c;
142 while (has_data(in)) {
143 in >> c;
144 if (c == '\\') {
145 in >> c;
146 if (c == 'n') d << '\n';
147 else if (c == '"') d << '"';
148 else if (c == '\\') d << '\\';
149 else {
150 raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
151 return;
152 }
153 continue;
154 } else {
155 d << c;
156 }
157 if (c == '"') break;
158 }
159 result.words.back().data = d.str();
160 result.words.back().original = d.str();
161
162 ostringstream m;
163 while (!isspace(in.peek()) && has_data(in)) {
164 in >> c;
165 if (c == '/') {
166 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
167 m.str("");
168 }
169 else {
170 m << c;
171 }
172 }
173 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
174 }
175 else {
176
177 ostringstream w;
178 w << c;
179 while (!isspace(in.peek()) && has_data(in)) {
180 in >> c;
181 w << c;
182 }
183 parse_word(w.str(), result.words.back());
184 }
185 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
186 }
187 if (!result.words.empty())
188 out.push_back(result);
189 }
190
191 void skip_whitespace(istream& in) {
192 while (true) {
193 if (has_data(in) && isspace(in.peek())) in.get();
194 else break;
195 }
196 }
197
198 void skip_comment(istream& in) {
199 if (has_data(in) && in.peek() == '#') {
200 in.get();
201 while (has_data(in) && in.peek() != '\n') in.get();
202 }
203 }
204
205 line label(string s) {
206 line result;
207 result.words.push_back(word());
208 result.words.back().data = (s+":");
209 return result;
210 }
211
212
213 void parse_instruction_character_by_character(const string& line_data) {
214 vector<line> out;
215 parse_instruction_character_by_character(line_data, out);
216 }
217
218 void test_parse2_comment_token_in_middle() {
219 parse_instruction_character_by_character(
220 "a . z\n"
221 );
222 CHECK_TRACE_CONTENTS(
223 "parse2: word: a\n"
224 "parse2: word: z\n"
225 );
226 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
227
228 CHECK_TRACE_COUNT("parse2", 2);
229 }
230
231 void test_parse2_word_starting_with_dot() {
232 parse_instruction_character_by_character(
233 "a .b c\n"
234 );
235 CHECK_TRACE_CONTENTS(
236 "parse2: word: a\n"
237 "parse2: word: .b\n"
238 "parse2: word: c\n"
239 );
240 }
241
242 void test_parse2_comment_token_at_start() {
243 parse_instruction_character_by_character(
244 ". a b\n"
245 );
246 CHECK_TRACE_CONTENTS(
247 "parse2: word: a\n"
248 "parse2: word: b\n"
249 );
250 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
251 }
252
253 void test_parse2_comment_token_at_end() {
254 parse_instruction_character_by_character(
255 "a b .\n"
256 );
257 CHECK_TRACE_CONTENTS(
258 "parse2: word: a\n"
259 "parse2: word: b\n"
260 );
261 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
262 }
263
264 void test_parse2_word_starting_with_dot_at_start() {
265 parse_instruction_character_by_character(
266 ".a b c\n"
267 );
268 CHECK_TRACE_CONTENTS(
269 "parse2: word: .a\n"
270 "parse2: word: b\n"
271 "parse2: word: c\n"
272 );
273 }
274
275 void test_parse2_metadata() {
276 parse_instruction_character_by_character(
277 ".a b/c d\n"
278 );
279 CHECK_TRACE_CONTENTS(
280 "parse2: word: .a\n"
281 "parse2: word: b /c\n"
282 "parse2: word: d\n"
283 );
284 }
285
286 void test_parse2_string_with_metadata() {
287 parse_instruction_character_by_character(
288 "a \"bc def\"/disp32 g\n"
289 );
290 CHECK_TRACE_CONTENTS(
291 "parse2: word: a\n"
292 "parse2: word: \"bc def\" /disp32\n"
293 "parse2: word: g\n"
294 );
295 }
296
297 void test_parse2_string_with_metadata_at_end() {
298 parse_instruction_character_by_character(
299 "a \"bc def\"/disp32\n"
300 );
301 CHECK_TRACE_CONTENTS(
302 "parse2: word: a\n"
303 "parse2: word: \"bc def\" /disp32\n"
304 );
305 }
306
307 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
308 parse_instruction_character_by_character(
309 "68/push \"test\"/f"
310 );
311 CHECK_TRACE_CONTENTS(
312 "parse2: word: 68 /push\n"
313 "parse2: word: \"test\" /f\n"
314 );
315 }
316
317
318
319
320 void test_parse2_string_containing_slashes() {
321 parse_instruction_character_by_character(
322 "a \"bc/def\"/disp32\n"
323 );
324 CHECK_TRACE_CONTENTS(
325 "parse2: word: \"bc/def\" /disp32\n"
326 );
327 }
328
329 void test_instruction_with_string_literal_with_escaped_quote() {
330 parse_instruction_character_by_character(
331 "\"a\\\"b\"\n" // escaped quote inside string
332 );
333 CHECK_TRACE_CONTENTS(
334 "parse2: word: \"a\"b\"\n"
335 );
336
337 CHECK_TRACE_COUNT("parse2", 1);
338 }
339
340 void test_instruction_with_string_literal_with_escaped_backslash() {
341 parse_instruction_character_by_character(
342 "\"a\\\\b\"\n"
343 );
344 CHECK_TRACE_CONTENTS(
345 "parse2: word: \"a\\b\"\n"
346 );
347
348 CHECK_TRACE_COUNT("parse2", 1);
349 }