https://github.com/akkartik/mu/blob/master/038---literal_strings.cc
1
2
3
4
5
6 void test_transform_literal_string() {
7 run(
8 "== code 0x1\n"
9 "b8/copy \"test\"/imm32\n"
10 "== data 0x2000\n"
11 );
12 CHECK_TRACE_CONTENTS(
13 "transform: -- move literal strings to data segment\n"
14 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
15 "transform: line after transform: 'b8 __subx_global_1'\n"
16 );
17 }
18
19
20
21
22 :(after "Begin Transforms")
23
24 Transform.push_back(transform_literal_strings);
25
26
27 :(before "End Globals")
28 int Next_auto_global = 1;
29 :(before "End Reset")
30 Next_auto_global = 1;
31 :(code)
32 void transform_literal_strings(program& p) {
33 trace(3, "transform") << "-- move literal strings to data segment" << end();
34 if (p.segments.empty()) return;
35 vector<line> new_lines;
36 for (int s = 0; s < SIZE(p.segments); ++s) {
37 segment& seg = p.segments.at(s);
38 trace(99, "transform") << "segment '" << seg.name << "'" << end();
39 for (int i = 0; i < SIZE(seg.lines); ++i) {
40
41 line& line = seg.lines.at(i);
42 for (int j = 0; j < SIZE(line.words); ++j) {
43 word& curr = line.words.at(j);
44 if (curr.data.at(0) != '"') continue;
45 ostringstream global_name;
46 global_name << "__subx_global_" << Next_auto_global;
47 ++Next_auto_global;
48 add_global_to_data_segment(global_name.str(), curr, new_lines);
49 curr.data = global_name.str();
50 }
51 trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end();
52 }
53 }
54 segment* data = find(p, "data");
55 if (data)
56 data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end());
57 }
58
59 void add_global_to_data_segment(const string& name, const word& value, vector<line>& out) {
60 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
61
62 out.push_back(label(name));
63
64 out.push_back(line());
65 emit_hex_bytes(out.back(), SIZE(value.data)-2, 4);
66
67 out.push_back(line());
68 line& curr = out.back();
69 for (int i = 1; i < SIZE(value.data)-1; ++i) {
70 char c = value.data.at(i);
71 curr.words.push_back(word());
72 curr.words.back().data = hex_byte_to_string(c);
73 curr.words.back().metadata.push_back(string(1, c));
74 }
75 }
76
77
78
79
80 void test_instruction_with_string_literal() {
81 parse_instruction_character_by_character(
82 "a \"abc def\" z\n"
83 );
84 CHECK_TRACE_CONTENTS(
85 "parse2: word: a\n"
86 "parse2: word: \"abc def\"\n"
87 "parse2: word: z\n"
88 );
89
90 CHECK_TRACE_COUNT("parse2", 3);
91 }
92
93 void test_string_literal_in_data_segment() {
94 run(
95 "== code 0x1\n"
96 "b8/copy X/imm32\n"
97 "== data 0x2000\n"
98 "X:\n"
99 "\"test\"/imm32\n"
100 );
101 CHECK_TRACE_CONTENTS(
102 "transform: -- move literal strings to data segment\n"
103 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
104 "transform: line after transform: '__subx_global_1'\n"
105 );
106 }
107
108 :(before "End Line Parsing Special-cases(line_data -> l)")
109 if (line_data.find('"') != string::npos) {
110 parse_instruction_character_by_character(line_data, l);
111 continue;
112 }
113
114 :(code)
115 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
116 if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
117 raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
118 return;
119 }
120
121 istringstream in(line_data);
122 in >> std::noskipws;
123 line result;
124 result.original = line_data;
125
126 while (has_data(in)) {
127 skip_whitespace(in);
128 if (!has_data(in)) break;
129 char c = in.get();
130 if (c == '#') break;
131 if (c == ':') break;
132 if (c == '.') {
133 if (!has_data(in)) break;
134 if (isspace(in.peek()))
135 continue;
136 }
137 result.words.push_back(word());
138 if (c == '"') {
139
140 ostringstream d;
141 d << c;
142 while (has_data(in)) {
143 in >> c;
144 if (c == '\\') {
145 in >> c;
146 if (c == 'n') d << '\n';
147 else if (c == '"') d << '"';
148 else if (c == '\\') d << '\\';
149 else {
150 raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
151 return;
152 }
153 continue;
154 } else {
155 d << c;
156 }
157 if (c == '"') break;
158 }
159 result.words.back().data = d.str();
160 result.words.back().original = d.str();
161
162 ostringstream m;
163 while (!isspace(in.peek()) && has_data(in)) {
164 in >> c;
165 if (c == '/') {
166 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
167 m.str("");
168 }
169 else {
170 m << c;
171 }
172 }
173 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
174 }
175 else {
176
177 ostringstream w;
178 w << c;
179 while (!isspace(in.peek()) && has_data(in)) {
180 in >> c;
181 w << c;
182 }
183 parse_word(w.str(), result.words.back());
184 }
185 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
186 }
187 if (!result.words.empty())
188 out.push_back(result);
189 }
190
191 void skip_whitespace(istream& in) {
192 while (has_data(in) && isspace(in.peek())) {
193 in.get();
194 }
195 }
196
197 void skip_comment(istream& in) {
198 if (has_data(in) && in.peek() == '#') {
199 in.get();
200 while (has_data(in) && in.peek() != '\n') in.get();
201 }
202 }
203
204 line label(string s) {
205 line result;
206 result.words.push_back(word());
207 result.words.back().data = (s+":");
208 return result;
209 }
210
211
212 void parse_instruction_character_by_character(const string& line_data) {
213 vector<line> out;
214 parse_instruction_character_by_character(line_data, out);
215 }
216
217 void test_parse2_comment_token_in_middle() {
218 parse_instruction_character_by_character(
219 "a . z\n"
220 );
221 CHECK_TRACE_CONTENTS(
222 "parse2: word: a\n"
223 "parse2: word: z\n"
224 );
225 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
226
227 CHECK_TRACE_COUNT("parse2", 2);
228 }
229
230 void test_parse2_word_starting_with_dot() {
231 parse_instruction_character_by_character(
232 "a .b c\n"
233 );
234 CHECK_TRACE_CONTENTS(
235 "parse2: word: a\n"
236 "parse2: word: .b\n"
237 "parse2: word: c\n"
238 );
239 }
240
241 void test_parse2_comment_token_at_start() {
242 parse_instruction_character_by_character(
243 ". a b\n"
244 );
245 CHECK_TRACE_CONTENTS(
246 "parse2: word: a\n"
247 "parse2: word: b\n"
248 );
249 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
250 }
251
252 void test_parse2_comment_token_at_end() {
253 parse_instruction_character_by_character(
254 "a b .\n"
255 );
256 CHECK_TRACE_CONTENTS(
257 "parse2: word: a\n"
258 "parse2: word: b\n"
259 );
260 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
261 }
262
263 void test_parse2_word_starting_with_dot_at_start() {
264 parse_instruction_character_by_character(
265 ".a b c\n"
266 );
267 CHECK_TRACE_CONTENTS(
268 "parse2: word: .a\n"
269 "parse2: word: b\n"
270 "parse2: word: c\n"
271 );
272 }
273
274 void test_parse2_metadata() {
275 parse_instruction_character_by_character(
276 ".a b/c d\n"
277 );
278 CHECK_TRACE_CONTENTS(
279 "parse2: word: .a\n"
280 "parse2: word: b /c\n"
281 "parse2: word: d\n"
282 );
283 }
284
285 void test_parse2_string_with_metadata() {
286 parse_instruction_character_by_character(
287 "a \"bc def\"/disp32 g\n"
288 );
289 CHECK_TRACE_CONTENTS(
290 "parse2: word: a\n"
291 "parse2: word: \"bc def\" /disp32\n"
292 "parse2: word: g\n"
293 );
294 }
295
296 void test_parse2_string_with_metadata_at_end() {
297 parse_instruction_character_by_character(
298 "a \"bc def\"/disp32\n"
299 );
300 CHECK_TRACE_CONTENTS(
301 "parse2: word: a\n"
302 "parse2: word: \"bc def\" /disp32\n"
303 );
304 }
305
306 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
307 parse_instruction_character_by_character(
308 "68/push \"test\"/f"
309 );
310 CHECK_TRACE_CONTENTS(
311 "parse2: word: 68 /push\n"
312 "parse2: word: \"test\" /f\n"
313 );
314 }
315
316
317
318
319 void test_parse2_string_containing_slashes() {
320 parse_instruction_character_by_character(
321 "a \"bc/def\"/disp32\n"
322 );
323 CHECK_TRACE_CONTENTS(
324 "parse2: word: \"bc/def\" /disp32\n"
325 );
326 }
327
328 void test_instruction_with_string_literal_with_escaped_quote() {
329 parse_instruction_character_by_character(
330 "\"a\\\"b\"\n" // escaped quote inside string
331 );
332 CHECK_TRACE_CONTENTS(
333 "parse2: word: \"a\"b\"\n"
334 );
335
336 CHECK_TRACE_COUNT("parse2", 1);
337 }
338
339 void test_instruction_with_string_literal_with_escaped_backslash() {
340 parse_instruction_character_by_character(
341 "\"a\\\\b\"\n"
342 );
343 CHECK_TRACE_CONTENTS(
344 "parse2: word: \"a\\b\"\n"
345 );
346
347 CHECK_TRACE_COUNT("parse2", 1);
348 }