https://github.com/akkartik/mu/blob/master/038---literal_strings.cc
1
2
3
4
5
6 void test_transform_literal_string() {
7 run(
8 "== code 0x1\n"
9 "b8/copy \"test\"/imm32\n"
10 "== data 0x2000\n"
11 );
12 CHECK_TRACE_CONTENTS(
13 "transform: -- move literal strings to data segment\n"
14 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
15 "transform: line after transform: 'b8 __subx_global_1'\n"
16 );
17 }
18
19
20
21
22 :(after "Begin Transforms")
23 Transform.push_back(transform_literal_strings);
24
25 :(before "End Globals")
26 int Next_auto_global = 1;
27 :(before "End Reset")
28 Next_auto_global = 1;
29 :(code)
30 void transform_literal_strings(program& p) {
31 trace(3, "transform") << "-- move literal strings to data segment" << end();
32 if (p.segments.empty()) return;
33 vector<line> new_lines;
34 for (int s = 0; s < SIZE(p.segments); ++s) {
35 segment& seg = p.segments.at(s);
36 trace(99, "transform") << "segment '" << seg.name << "'" << end();
37 for (int i = 0; i < SIZE(seg.lines); ++i) {
38
39 line& line = seg.lines.at(i);
40 for (int j = 0; j < SIZE(line.words); ++j) {
41 word& curr = line.words.at(j);
42 if (curr.data.at(0) != '"') continue;
43 ostringstream global_name;
44 global_name << "__subx_global_" << Next_auto_global;
45 ++Next_auto_global;
46 add_global_to_data_segment(global_name.str(), curr, new_lines);
47 curr.data = global_name.str();
48 }
49 trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end();
50 }
51 }
52 segment* data = find(p, "data");
53 if (data)
54 data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end());
55 }
56
57 void add_global_to_data_segment(const string& name, const word& value, vector<line>& out) {
58 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
59
60 out.push_back(label(name));
61
62 out.push_back(line());
63 emit_hex_bytes(out.back(), SIZE(value.data)-2, 4);
64
65 out.push_back(line());
66 line& curr = out.back();
67 for (int i = 1; i < SIZE(value.data)-1; ++i) {
68 char c = value.data.at(i);
69 curr.words.push_back(word());
70 curr.words.back().data = hex_byte_to_string(c);
71 curr.words.back().metadata.push_back(string(1, c));
72 }
73 }
74
75
76
77
78 void test_instruction_with_string_literal() {
79 parse_instruction_character_by_character(
80 "a \"abc def\" z\n"
81 );
82 CHECK_TRACE_CONTENTS(
83 "parse2: word: a\n"
84 "parse2: word: \"abc def\"\n"
85 "parse2: word: z\n"
86 );
87
88 CHECK_TRACE_COUNT("parse2", 3);
89 }
90
91 void test_string_literal_in_data_segment() {
92 run(
93 "== code 0x1\n"
94 "b8/copy X/imm32\n"
95 "== data 0x2000\n"
96 "X:\n"
97 "\"test\"/imm32\n"
98 );
99 CHECK_TRACE_CONTENTS(
100 "transform: -- move literal strings to data segment\n"
101 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
102 "transform: line after transform: '__subx_global_1'\n"
103 );
104 }
105
106 :(before "End Line Parsing Special-cases(line_data -> l)")
107 if (line_data.find('"') != string::npos) {
108 parse_instruction_character_by_character(line_data, l);
109 continue;
110 }
111
112 :(code)
113 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
114 if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
115 raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
116 return;
117 }
118
119 istringstream in(line_data);
120 in >> std::noskipws;
121 line result;
122 result.original = line_data;
123
124 while (has_data(in)) {
125 skip_whitespace(in);
126 if (!has_data(in)) break;
127 char c = in.get();
128 if (c == '#') break;
129 if (c == ':') break;
130 if (c == '.') {
131 if (!has_data(in)) break;
132 if (isspace(in.peek()))
133 continue;
134 }
135 result.words.push_back(word());
136 if (c == '"') {
137
138 ostringstream d;
139 d << c;
140 while (has_data(in)) {
141 in >> c;
142 if (c == '\\') {
143 in >> c;
144 if (c == 'n') d << '\n';
145 else if (c == '"') d << '"';
146 else if (c == '\\') d << '\\';
147 else {
148 raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
149 return;
150 }
151 continue;
152 } else {
153 d << c;
154 }
155 if (c == '"') break;
156 }
157 result.words.back().data = d.str();
158 result.words.back().original = d.str();
159
160 ostringstream m;
161 while (!isspace(in.peek()) && has_data(in)) {
162 in >> c;
163 if (c == '/') {
164 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
165 m.str("");
166 }
167 else {
168 m << c;
169 }
170 }
171 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
172 }
173 else {
174
175 ostringstream w;
176 w << c;
177 while (!isspace(in.peek()) && has_data(in)) {
178 in >> c;
179 w << c;
180 }
181 parse_word(w.str(), result.words.back());
182 }
183 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
184 }
185 if (!result.words.empty())
186 out.push_back(result);
187 }
188
189 void skip_whitespace(istream& in) {
190 while (has_data(in) && isspace(in.peek())) {
191 in.get();
192 }
193 }
194
195 void skip_comment(istream& in) {
196 if (has_data(in) && in.peek() == '#') {
197 in.get();
198 while (has_data(in) && in.peek() != '\n') in.get();
199 }
200 }
201
202 line label(string s) {
203 line result;
204 result.words.push_back(word());
205 result.words.back().data = (s+":");
206 return result;
207 }
208
209
210 void parse_instruction_character_by_character(const string& line_data) {
211 vector<line> out;
212 parse_instruction_character_by_character(line_data, out);
213 }
214
215 void test_parse2_comment_token_in_middle() {
216 parse_instruction_character_by_character(
217 "a . z\n"
218 );
219 CHECK_TRACE_CONTENTS(
220 "parse2: word: a\n"
221 "parse2: word: z\n"
222 );
223 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
224
225 CHECK_TRACE_COUNT("parse2", 2);
226 }
227
228 void test_parse2_word_starting_with_dot() {
229 parse_instruction_character_by_character(
230 "a .b c\n"
231 );
232 CHECK_TRACE_CONTENTS(
233 "parse2: word: a\n"
234 "parse2: word: .b\n"
235 "parse2: word: c\n"
236 );
237 }
238
239 void test_parse2_comment_token_at_start() {
240 parse_instruction_character_by_character(
241 ". a b\n"
242 );
243 CHECK_TRACE_CONTENTS(
244 "parse2: word: a\n"
245 "parse2: word: b\n"
246 );
247 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
248 }
249
250 void test_parse2_comment_token_at_end() {
251 parse_instruction_character_by_character(
252 "a b .\n"
253 );
254 CHECK_TRACE_CONTENTS(
255 "parse2: word: a\n"
256 "parse2: word: b\n"
257 );
258 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
259 }
260
261 void test_parse2_word_starting_with_dot_at_start() {
262 parse_instruction_character_by_character(
263 ".a b c\n"
264 );
265 CHECK_TRACE_CONTENTS(
266 "parse2: word: .a\n"
267 "parse2: word: b\n"
268 "parse2: word: c\n"
269 );
270 }
271
272 void test_parse2_metadata() {
273 parse_instruction_character_by_character(
274 ".a b/c d\n"
275 );
276 CHECK_TRACE_CONTENTS(
277 "parse2: word: .a\n"
278 "parse2: word: b /c\n"
279 "parse2: word: d\n"
280 );
281 }
282
283 void test_parse2_string_with_metadata() {
284 parse_instruction_character_by_character(
285 "a \"bc def\"/disp32 g\n"
286 );
287 CHECK_TRACE_CONTENTS(
288 "parse2: word: a\n"
289 "parse2: word: \"bc def\" /disp32\n"
290 "parse2: word: g\n"
291 );
292 }
293
294 void test_parse2_string_with_metadata_at_end() {
295 parse_instruction_character_by_character(
296 "a \"bc def\"/disp32\n"
297 );
298 CHECK_TRACE_CONTENTS(
299 "parse2: word: a\n"
300 "parse2: word: \"bc def\" /disp32\n"
301 );
302 }
303
304 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
305 parse_instruction_character_by_character(
306 "68/push \"test\"/f"
307 );
308 CHECK_TRACE_CONTENTS(
309 "parse2: word: 68 /push\n"
310 "parse2: word: \"test\" /f\n"
311 );
312 }
313
314
315
316
317 void test_parse2_string_containing_slashes() {
318 parse_instruction_character_by_character(
319 "a \"bc/def\"/disp32\n"
320 );
321 CHECK_TRACE_CONTENTS(
322 "parse2: word: \"bc/def\" /disp32\n"
323 );
324 }
325
326 void test_instruction_with_string_literal_with_escaped_quote() {
327 parse_instruction_character_by_character(
328 "\"a\\\"b\"\n" // escaped quote inside string
329 );
330 CHECK_TRACE_CONTENTS(
331 "parse2: word: \"a\"b\"\n"
332 );
333
334 CHECK_TRACE_COUNT("parse2", 1);
335 }
336
337 void test_instruction_with_string_literal_with_escaped_backslash() {
338 parse_instruction_character_by_character(
339 "\"a\\\\b\"\n"
340 );
341 CHECK_TRACE_CONTENTS(
342 "parse2: word: \"a\\b\"\n"
343 );
344
345 CHECK_TRACE_COUNT("parse2", 1);
346 }