https://github.com/akkartik/mu/blob/master/subx/038---literal_strings.cc
1
2
3
4
5
6 void test_transform_literal_string() {
7 run(
8 "== code 0x1\n"
9 "b8/copy \"test\"/imm32\n"
10 "== data 0x2000\n"
11 );
12 CHECK_TRACE_CONTENTS(
13 "transform: -- move literal strings to data segment\n"
14 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
15 "transform: instruction after transform: 'b8 __subx_global_1'\n"
16 );
17 }
18
19
20
21
22 :(after "Begin Transforms")
23
24 Transform.push_back(transform_literal_strings);
25
26
27 :(before "End Globals")
28 int Next_auto_global = 1;
29 :(code)
30 void transform_literal_strings(program& p) {
31 trace(3, "transform") << "-- move literal strings to data segment" << end();
32 if (p.segments.empty()) return;
33 segment& code = *find(p, "code");
34 segment& data = *find(p, "data");
35 for (int i = 0; i < SIZE(code.lines); ++i) {
36 line& inst = code.lines.at(i);
37 for (int j = 0; j < SIZE(inst.words); ++j) {
38 word& curr = inst.words.at(j);
39 if (curr.data.at(0) != '"') continue;
40 ostringstream global_name;
41 global_name << "__subx_global_" << Next_auto_global;
42 ++Next_auto_global;
43 add_global_to_data_segment(global_name.str(), curr, data);
44 curr.data = global_name.str();
45 }
46 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
47 }
48 }
49
50 void add_global_to_data_segment(const string& name, const word& value, segment& data) {
51 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
52
53 data.lines.push_back(label(name));
54
55 data.lines.push_back(line());
56 emit_hex_bytes(data.lines.back(), SIZE(value.data)-2, 4);
57
58 data.lines.push_back(line());
59 line& curr = data.lines.back();
60 for (int i = 1; i < SIZE(value.data)-1; ++i) {
61 char c = value.data.at(i);
62 curr.words.push_back(word());
63 curr.words.back().data = hex_byte_to_string(c);
64 curr.words.back().metadata.push_back(string(1, c));
65 }
66 }
67
68
69
70
71 void test_instruction_with_string_literal() {
72 parse_instruction_character_by_character(
73 "a \"abc def\" z\n"
74 );
75 CHECK_TRACE_CONTENTS(
76 "parse2: word: a\n"
77 "parse2: word: \"abc def\"\n"
78 "parse2: word: z\n"
79 );
80
81 CHECK_TRACE_COUNT("parse2", 3);
82 }
83
84 :(before "End Line Parsing Special-cases(line_data -> l)")
85 if (line_data.find('"') != string::npos) {
86 parse_instruction_character_by_character(line_data, l);
87 continue;
88 }
89
90 :(code)
91 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
92 if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
93 raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
94 return;
95 }
96
97 istringstream in(line_data);
98 in >> std::noskipws;
99 line result;
100 result.original = line_data;
101
102 while (has_data(in)) {
103 skip_whitespace(in);
104 if (!has_data(in)) break;
105 char c = in.get();
106 if (c == '#') break;
107 if (c == ':') break;
108 if (c == '.') {
109 if (!has_data(in)) break;
110 if (isspace(in.peek()))
111 continue;
112 }
113 result.words.push_back(word());
114 if (c == '"') {
115
116 ostringstream d;
117 d << c;
118 while (has_data(in)) {
119 in >> c;
120 if (c == '\\') {
121 in >> c;
122 if (c == 'n') d << '\n';
123 else if (c == '"') d << '"';
124 else if (c == '\\') d << '\\';
125 else {
126 raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
127 return;
128 }
129 continue;
130 } else {
131 d << c;
132 }
133 if (c == '"') break;
134 }
135 result.words.back().data = d.str();
136
137 ostringstream m;
138 while (!isspace(in.peek()) && has_data(in)) {
139 in >> c;
140 if (c == '/') {
141 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
142 m.str("");
143 }
144 else {
145 m << c;
146 }
147 }
148 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
149 }
150 else {
151
152 ostringstream w;
153 w << c;
154 while (!isspace(in.peek()) && has_data(in)) {
155 in >> c;
156 w << c;
157 }
158 parse_word(w.str(), result.words.back());
159 }
160 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
161 }
162 if (!result.words.empty())
163 out.push_back(result);
164 }
165
166 void skip_whitespace(istream& in) {
167 while (true) {
168 if (has_data(in) && isspace(in.peek())) in.get();
169 else break;
170 }
171 }
172
173 void skip_comment(istream& in) {
174 if (has_data(in) && in.peek() == '#') {
175 in.get();
176 while (has_data(in) && in.peek() != '\n') in.get();
177 }
178 }
179
180 line label(string s) {
181 line result;
182 result.words.push_back(word());
183 result.words.back().data = (s+":");
184 return result;
185 }
186
187
188 void parse_instruction_character_by_character(const string& line_data) {
189 vector<line> out;
190 parse_instruction_character_by_character(line_data, out);
191 }
192
193 void test_parse2_comment_token_in_middle() {
194 parse_instruction_character_by_character(
195 "a . z\n"
196 );
197 CHECK_TRACE_CONTENTS(
198 "parse2: word: a\n"
199 "parse2: word: z\n"
200 );
201 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
202
203 CHECK_TRACE_COUNT("parse2", 2);
204 }
205
206 void test_parse2_word_starting_with_dot() {
207 parse_instruction_character_by_character(
208 "a .b c\n"
209 );
210 CHECK_TRACE_CONTENTS(
211 "parse2: word: a\n"
212 "parse2: word: .b\n"
213 "parse2: word: c\n"
214 );
215 }
216
217 void test_parse2_comment_token_at_start() {
218 parse_instruction_character_by_character(
219 ". a b\n"
220 );
221 CHECK_TRACE_CONTENTS(
222 "parse2: word: a\n"
223 "parse2: word: b\n"
224 );
225 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
226 }
227
228 void test_parse2_comment_token_at_end() {
229 parse_instruction_character_by_character(
230 "a b .\n"
231 );
232 CHECK_TRACE_CONTENTS(
233 "parse2: word: a\n"
234 "parse2: word: b\n"
235 );
236 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
237 }
238
239 void test_parse2_word_starting_with_dot_at_start() {
240 parse_instruction_character_by_character(
241 ".a b c\n"
242 );
243 CHECK_TRACE_CONTENTS(
244 "parse2: word: .a\n"
245 "parse2: word: b\n"
246 "parse2: word: c\n"
247 );
248 }
249
250 void test_parse2_metadata() {
251 parse_instruction_character_by_character(
252 ".a b/c d\n"
253 );
254 CHECK_TRACE_CONTENTS(
255 "parse2: word: .a\n"
256 "parse2: word: b /c\n"
257 "parse2: word: d\n"
258 );
259 }
260
261 void test_parse2_string_with_metadata() {
262 parse_instruction_character_by_character(
263 "a \"bc def\"/disp32 g\n"
264 );
265 CHECK_TRACE_CONTENTS(
266 "parse2: word: a\n"
267 "parse2: word: \"bc def\" /disp32\n"
268 "parse2: word: g\n"
269 );
270 }
271
272 void test_parse2_string_with_metadata_at_end() {
273 parse_instruction_character_by_character(
274 "a \"bc def\"/disp32\n"
275 );
276 CHECK_TRACE_CONTENTS(
277 "parse2: word: a\n"
278 "parse2: word: \"bc def\" /disp32\n"
279 );
280 }
281
282 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
283 parse_instruction_character_by_character(
284 "68/push \"test\"/f"
285 );
286 CHECK_TRACE_CONTENTS(
287 "parse2: word: 68 /push\n"
288 "parse2: word: \"test\" /f\n"
289 );
290 }
291
292
293
294
295 void test_parse2_string_containing_slashes() {
296 parse_instruction_character_by_character(
297 "a \"bc/def\"/disp32\n"
298 );
299 CHECK_TRACE_CONTENTS(
300 "parse2: word: \"bc/def\" /disp32\n"
301 );
302 }
303
304 void test_instruction_with_string_literal_with_escaped_quote() {
305 parse_instruction_character_by_character(
306 "\"a\\\"b\"\n" // escaped quote inside string
307 );
308 CHECK_TRACE_CONTENTS(
309 "parse2: word: \"a\"b\"\n"
310 );
311
312 CHECK_TRACE_COUNT("parse2", 1);
313 }
314
315 void test_instruction_with_string_literal_with_escaped_backslash() {
316 parse_instruction_character_by_character(
317 "\"a\\\\b\"\n"
318 );
319 CHECK_TRACE_CONTENTS(
320 "parse2: word: \"a\\b\"\n"
321 );
322
323 CHECK_TRACE_COUNT("parse2", 1);
324 }