https://github.com/akkartik/mu/blob/master/subx/038---literal_strings.cc
1
2
3
4
5
6 void test_transform_literal_string() {
7 run(
8 "== code\n"
9 "b8/copy \"test\"/imm32\n"
10 "== data\n"
11 );
12 CHECK_TRACE_CONTENTS(
13 "transform: -- move literal strings to data segment\n"
14 "transform: adding global variable '__subx_global_1' containing \"test\"\n"
15 "transform: instruction after transform: 'b8 __subx_global_1'\n"
16 );
17 }
18
19
20
21
22 :(after "Begin Transforms")
23
24 Transform.push_back(transform_literal_strings);
25
26
27 :(before "End Globals")
28 int Next_auto_global = 1;
29 :(code)
30 void transform_literal_strings(program& p) {
31 trace(3, "transform") << "-- move literal strings to data segment" << end();
32 if (p.segments.empty()) return;
33 segment& code = p.segments.at(0);
34 segment data;
35 for (int i = 0; i < SIZE(code.lines); ++i) {
36 line& inst = code.lines.at(i);
37 for (int j = 0; j < SIZE(inst.words); ++j) {
38 word& curr = inst.words.at(j);
39 if (curr.data.at(0) != '"') continue;
40 ostringstream global_name;
41 global_name << "__subx_global_" << Next_auto_global;
42 ++Next_auto_global;
43 add_global_to_data_segment(global_name.str(), curr, data);
44 curr.data = global_name.str();
45 }
46 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
47 }
48 if (data.lines.empty()) return;
49 if (SIZE(p.segments) < 2) {
50 p.segments.resize(2);
51 p.segments.at(1).lines.swap(data.lines);
52 }
53 vector<line>& existing_data = p.segments.at(1).lines;
54 existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end());
55 }
56
57 void add_global_to_data_segment(const string& name, const word& value, segment& data) {
58 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
59
60 data.lines.push_back(label(name));
61
62 data.lines.push_back(line());
63 emit_hex_bytes(data.lines.back(), SIZE(value.data)-2, 4);
64
65 data.lines.push_back(line());
66 line& curr = data.lines.back();
67 for (int i = 1; i < SIZE(value.data)-1; ++i) {
68 char c = value.data.at(i);
69 curr.words.push_back(word());
70 curr.words.back().data = hex_byte_to_string(c);
71 curr.words.back().metadata.push_back(string(1, c));
72 }
73 }
74
75
76
77
78 void test_instruction_with_string_literal() {
79 parse_instruction_character_by_character(
80 "a \"abc def\" z\n"
81 );
82 CHECK_TRACE_CONTENTS(
83 "parse2: word: a\n"
84 "parse2: word: \"abc def\"\n"
85 "parse2: word: z\n"
86 );
87
88 CHECK_TRACE_COUNT("parse2", 3);
89 }
90
91 :(before "End Line Parsing Special-cases(line_data -> l)")
92 if (line_data.find('"') != string::npos) {
93 parse_instruction_character_by_character(line_data, l);
94 continue;
95 }
96
97 :(code)
98 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
99 if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
100 raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
101 return;
102 }
103
104 istringstream in(line_data);
105 in >> std::noskipws;
106 line result;
107
108 while (has_data(in)) {
109 skip_whitespace(in);
110 if (!has_data(in)) break;
111 char c = in.get();
112 if (c == '#') break;
113 if (c == ':') break;
114 if (c == '.') {
115 if (!has_data(in)) break;
116 if (isspace(in.peek()))
117 continue;
118 }
119 result.words.push_back(word());
120 if (c == '"') {
121
122 ostringstream d;
123 d << c;
124 while (has_data(in)) {
125 in >> c;
126 d << c;
127 if (c == '"') break;
128 }
129 result.words.back().data = d.str();
130
131 ostringstream m;
132 while (!isspace(in.peek()) && has_data(in)) {
133 in >> c;
134 if (c == '/') {
135 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
136 m.str("");
137 }
138 else {
139 m << c;
140 }
141 }
142 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
143 }
144 else {
145
146 ostringstream w;
147 w << c;
148 while (!isspace(in.peek()) && has_data(in)) {
149 in >> c;
150 w << c;
151 }
152 parse_word(w.str(), result.words.back());
153 }
154 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
155 }
156 if (!result.words.empty())
157 out.push_back(result);
158 }
159
160 void skip_whitespace(istream& in) {
161 while (true) {
162 if (has_data(in) && isspace(in.peek())) in.get();
163 else break;
164 }
165 }
166
167 void skip_comment(istream& in) {
168 if (has_data(in) && in.peek() == '#') {
169 in.get();
170 while (has_data(in) && in.peek() != '\n') in.get();
171 }
172 }
173
174
175 void parse_instruction_character_by_character(const string& line_data) {
176 vector<line> out;
177 parse_instruction_character_by_character(line_data, out);
178 }
179
180 void test_parse2_comment_token_in_middle() {
181 parse_instruction_character_by_character(
182 "a . z\n"
183 );
184 CHECK_TRACE_CONTENTS(
185 "parse2: word: a\n"
186 "parse2: word: z\n"
187 );
188 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
189
190 CHECK_TRACE_COUNT("parse2", 2);
191 }
192
193 void test_parse2_word_starting_with_dot() {
194 parse_instruction_character_by_character(
195 "a .b c\n"
196 );
197 CHECK_TRACE_CONTENTS(
198 "parse2: word: a\n"
199 "parse2: word: .b\n"
200 "parse2: word: c\n"
201 );
202 }
203
204 void test_parse2_comment_token_at_start() {
205 parse_instruction_character_by_character(
206 ". a b\n"
207 );
208 CHECK_TRACE_CONTENTS(
209 "parse2: word: a\n"
210 "parse2: word: b\n"
211 );
212 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
213 }
214
215 void test_parse2_comment_token_at_end() {
216 parse_instruction_character_by_character(
217 "a b .\n"
218 );
219 CHECK_TRACE_CONTENTS(
220 "parse2: word: a\n"
221 "parse2: word: b\n"
222 );
223 CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
224 }
225
226 void test_parse2_word_starting_with_dot_at_start() {
227 parse_instruction_character_by_character(
228 ".a b c\n"
229 );
230 CHECK_TRACE_CONTENTS(
231 "parse2: word: .a\n"
232 "parse2: word: b\n"
233 "parse2: word: c\n"
234 );
235 }
236
237 void test_parse2_metadata() {
238 parse_instruction_character_by_character(
239 ".a b/c d\n"
240 );
241 CHECK_TRACE_CONTENTS(
242 "parse2: word: .a\n"
243 "parse2: word: b /c\n"
244 "parse2: word: d\n"
245 );
246 }
247
248 void test_parse2_string_with_metadata() {
249 parse_instruction_character_by_character(
250 "a \"bc def\"/disp32 g\n"
251 );
252 CHECK_TRACE_CONTENTS(
253 "parse2: word: a\n"
254 "parse2: word: \"bc def\" /disp32\n"
255 "parse2: word: g\n"
256 );
257 }
258
259 void test_parse2_string_with_metadata_at_end() {
260 parse_instruction_character_by_character(
261 "a \"bc def\"/disp32\n"
262 );
263 CHECK_TRACE_CONTENTS(
264 "parse2: word: a\n"
265 "parse2: word: \"bc def\" /disp32\n"
266 );
267 }
268
269 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
270 parse_instruction_character_by_character(
271 "68/push \"test\"/f"
272 );
273 CHECK_TRACE_CONTENTS(
274 "parse2: word: 68 /push\n"
275 "parse2: word: \"test\" /f\n"
276 );
277 }
278
279
280
281
282 void test_parse2_string_containing_slashes() {
283 parse_instruction_character_by_character(
284 "a \"bc/def\"/disp32\n"
285 );
286 CHECK_TRACE_CONTENTS(
287 "parse2: word: \"bc/def\" /disp32\n"
288 );
289 }