https://github.com/akkartik/mu/blob/main/038literal_strings.cc
1
2
3
4
5
6 void test_transform_literal_string() {
7 run(
8 "== code 0x1\n"
9 "b8/copy \"test\"/imm32\n"
10 "== data 0x2000\n"
11 );
12 CHECK_TRACE_CONTENTS(
13 "transform: -- move literal strings to data segment\n"
14 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
15 "transform: line after transform: 'b8 __subx_global_1'\n"
16 );
17 }
18
19
20
21
22 :(after "Begin Transforms")
23 Transform.push_back(transform_literal_strings);
24
25 :(before "End Globals")
26 int Next_auto_global = 1;
27 :(before "End Reset")
28 Next_auto_global = 1;
29 :(code)
30 void transform_literal_strings(program& p) {
31 trace(3, "transform") << "-- move literal strings to data segment" << end();
32 if (p.segments.empty()) return;
33 vector<line> new_lines;
34 for (int s = 0; s < SIZE(p.segments); ++s) {
35 segment& seg = p.segments.at(s);
36 trace(99, "transform") << "segment '" << seg.name << "'" << end();
37 for (int i = 0; i < SIZE(seg.lines); ++i) {
38
39 line& line = seg.lines.at(i);
40 for (int j = 0; j < SIZE(line.words); ++j) {
41 word& curr = line.words.at(j);
42 if (curr.data.at(0) != '"') continue;
43 ostringstream global_name;
44 global_name << "__subx_global_" << Next_auto_global;
45 ++Next_auto_global;
46 add_global_to_data_segment(global_name.str(), curr, new_lines);
47 curr.data = global_name.str();
48 }
49 trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end();
50 }
51 }
52 segment* data = find(p, "data");
53 if (data)
54 data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end());
55 }
56
57 void add_global_to_data_segment(const string& name, const word& value, vector<line>& out) {
58 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
59
60 out.push_back(label(name));
61
62 out.push_back(line());
63 emit_hex_bytes(out.back(), SIZE(value.data)-2, 4);
64
65 out.push_back(line());
66 line& curr = out.back();
67 for (int i = 1; i < SIZE(value.data)-1; ++i) {
68 char c = value.data.at(i);
69 curr.words.push_back(word());
70 curr.words.back().data = hex_byte_to_string(c);
71 curr.words.back().metadata.push_back(string(1, c));
72 }
73 }
74
75
76
77
78 void test_instruction_with_string_literal() {
79 parse_instruction_character_by_character(
80 "a \"abc def\" z\n"
81 );
82 CHECK_TRACE_CONTENTS(
83 "parse2: word: a\n"
84 "parse2: word: \"abc def\"\n"
85 "parse2: word: z\n"
86 );
87
88 CHECK_TRACE_COUNT("parse2", 3);
89 }
90
91 void test_string_literal_in_data_segment() {
92 run(
93 "== code 0x1\n"
94 "b8/copy X/imm32\n"
95 "== data 0x2000\n"
96 "X:\n"
97 "\"test\"/imm32\n"
98 );
99 CHECK_TRACE_CONTENTS(
100 "transform: -- move literal strings to data segment\n"
101 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
102 "transform: line after transform: '__subx_global_1'\n"
103 );
104 }
105
106 void test_string_literal_with_missing_quote() {
107 Hide_errors = true;
108 run(
109 "== code 0x1\n"
110 "b8/copy \"test/imm32\n"
111 "== data 0x2000\n"
112 );
113 CHECK_TRACE_CONTENTS(
114 "error: unclosed string in: b8/copy \"test/imm32"
115 );
116 }
117
118 :(before "End Line Parsing Special-cases(line_data -> l)")
119 if (line_data.find('"') != string::npos) {
120 parse_instruction_character_by_character(line_data, l);
121 continue;
122 }
123
124 :(code)
125 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
126 if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
127 raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
128 return;
129 }
130
131 istringstream in(line_data);
132 in >> std::noskipws;
133 line result;
134 result.original = line_data;
135
136 while (has_data(in)) {
137 skip_whitespace(in);
138 if (!has_data(in)) break;
139 char c = in.get();
140 if (c == '#') break;
141 if (c == ':') break;
142 if (c == '.') {
143 if (!has_data(in)) break;
144 if (isspace(in.peek()))
145 continue;
146 }
147 result.words.push_back(word());
148 if (c == '"') {
149
150 ostringstream d;
151 d << c;
152 while (true) {
153 if (!has_data(in)) {
154 raise << "unclosed string in: " << line_data << end();
155 return;
156 }
157 in >> c;
158 if (c == '\\') {
159 in >> c;
160 if (c == 'n') d << '\n';
161 else if (c == '"') d << '"';
162 else if (c == '\\') d << '\\';
163 else {
164 raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
165 return;
166 }
167 continue;
168 } else {
169 d << c;
170 }
171 if (c == '"') break;
172 }
173 result.words.back().data = d.str();
174 result.words.back().original = d.str();
175
176 ostringstream m;
177 while (!isspace(in.peek()) && has_data(in)) {
178 in >> c;
179 if (c == '/') {
180 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
181 m.str("");
182 }
183 else {
184 m << c;
185 }
186 }
187 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
188 }
189 else {
190
191 ostringstream w;
192 w << c;
193 while (!isspace(in.peek()) && has_data(in)) {
194 in >> c;
195 w << c;
196 }
197 parse_word(w.str(), result.words.back());
198 }
199 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
200 }
201 if (!result.words.empty())
202 out.push_back(result);
203 }
204
205 void skip_whitespace(istream& in) {
206 while (has_data(in) && isspace(in.peek())) {
207 in.get();
208 }
209 }
210
211 void skip_comment(istream& in) {
212 if (has_data(in) && in.peek() == '#') {
213 in.get();
214 while (has_data(in) && in.peek() != '\n') in.get();
215 }
216 }
217
218 line label(string s) {
219 line result;
220 result.words.push_back(word());
221 result.words.back().data = (s+":");
222 return result;
223 }
224
225
226 void parse_instruction_character_by_character(const string& line_data) {
227 vector<line> out;
228 parse_instruction_character_by_character(line_data, out);
229 }
230
231 void test_parse2_comment_token_in_middle() {
232 parse_instruction_character_by_character(
233 "a . z\n"
234 );
235 CHECK_TRACE_CONTENTS(
236 "parse2: word: a\n"
237 "parse2: word: z\n"
238 );
239 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
240
241 CHECK_TRACE_COUNT("parse2", 2);
242 }
243
244 void test_parse2_word_starting_with_dot() {
245 parse_instruction_character_by_character(
246 "a .b c\n"
247 );
248 CHECK_TRACE_CONTENTS(
249 "parse2: word: a\n"
250 "parse2: word: .b\n"
251 "parse2: word: c\n"
252 );
253 }
254
255 void test_parse2_comment_token_at_start() {
256 parse_instruction_character_by_character(
257 ". a b\n"
258 );
259 CHECK_TRACE_CONTENTS(
260 "parse2: word: a\n"
261 "parse2: word: b\n"
262 );
263 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
264 }
265
266 void test_parse2_comment_token_at_end() {
267 parse_instruction_character_by_character(
268 "a b .\n"
269 );
270 CHECK_TRACE_CONTENTS(
271 "parse2: word: a\n"
272 "parse2: word: b\n"
273 );
274 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
275 }
276
277 void test_parse2_word_starting_with_dot_at_start() {
278 parse_instruction_character_by_character(
279 ".a b c\n"
280 );
281 CHECK_TRACE_CONTENTS(
282 "parse2: word: .a\n"
283 "parse2: word: b\n"
284 "parse2: word: c\n"
285 );
286 }
287
288 void test_parse2_metadata() {
289 parse_instruction_character_by_character(
290 ".a b/c d\n"
291 );
292 CHECK_TRACE_CONTENTS(
293 "parse2: word: .a\n"
294 "parse2: word: b /c\n"
295 "parse2: word: d\n"
296 );
297 }
298
299 void test_parse2_string_with_metadata() {
300 parse_instruction_character_by_character(
301 "a \"bc def\"/disp32 g\n"
302 );
303 CHECK_TRACE_CONTENTS(
304 "parse2: word: a\n"
305 "parse2: word: \"bc def\" /disp32\n"
306 "parse2: word: g\n"
307 );
308 }
309
310 void test_parse2_string_with_metadata_at_end() {
311 parse_instruction_character_by_character(
312 "a \"bc def\"/disp32\n"
313 );
314 CHECK_TRACE_CONTENTS(
315 "parse2: word: a\n"
316 "parse2: word: \"bc def\" /disp32\n"
317 );
318 }
319
320 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
321 parse_instruction_character_by_character(
322 "68/push \"test\"/f"
323 );
324 CHECK_TRACE_CONTENTS(
325 "parse2: word: 68 /push\n"
326 "parse2: word: \"test\" /f\n"
327 );
328 }
329
330
331
332
333 void test_parse2_string_containing_slashes() {
334 parse_instruction_character_by_character(
335 "a \"bc/def\"/disp32\n"
336 );
337 CHECK_TRACE_CONTENTS(
338 "parse2: word: \"bc/def\" /disp32\n"
339 );
340 }
341
342 void test_instruction_with_string_literal_with_escaped_quote() {
343 parse_instruction_character_by_character(
344 "\"a\\\"b\"\n" // escaped quote inside string
345 );
346 CHECK_TRACE_CONTENTS(
347 "parse2: word: \"a\"b\"\n"
348 );
349
350 CHECK_TRACE_COUNT("parse2", 1);
351 }
352
353 void test_instruction_with_string_literal_with_escaped_backslash() {
354 parse_instruction_character_by_character(
355 "\"a\\\\b\"\n"
356 );
357 CHECK_TRACE_CONTENTS(
358 "parse2: word: \"a\\b\"\n"
359 );
360
361 CHECK_TRACE_COUNT("parse2", 1);
362 }