https://github.com/akkartik/mu/blob/master/subx/038---literal_strings.cc
1
2
3
4
5
6 :(scenario transform_literal_string)
7 == code
8 b8/copy "test"/imm32
9 == data
10 +transform: -- move literal strings to data segment
11 +transform: adding global variable '__subx_global_1' containing "test"
12 +transform: instruction after transform: 'b8 __subx_global_1'
13
14
15
16
17 :(after "Begin Transforms")
18
19 Transform.push_back(transform_literal_strings);
20
21
22 :(before "End Globals")
23 int Next_auto_global = 1;
24 :(code)
25 void transform_literal_strings(program& p) {
26 trace(99, "transform") << "-- move literal strings to data segment" << end();
27 if (p.segments.empty()) return;
28 segment& code = p.segments.at(0);
29 segment data;
30 for (int i = 0; i < SIZE(code.lines); ++i) {
31 line& inst = code.lines.at(i);
32 for (int j = 0; j < SIZE(inst.words); ++j) {
33 word& curr = inst.words.at(j);
34 if (curr.data.at(0) != '"') continue;
35 ostringstream global_name;
36 global_name << "__subx_global_" << Next_auto_global;
37 ++Next_auto_global;
38 add_global_to_data_segment(global_name.str(), curr, data);
39 curr.data = global_name.str();
40 }
41 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
42 }
43 if (data.lines.empty()) return;
44 if (SIZE(p.segments) < 2) {
45 p.segments.resize(2);
46 p.segments.at(1).lines.swap(data.lines);
47 }
48 vector<line>& existing_data = p.segments.at(1).lines;
49 existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end());
50 }
51
52 void add_global_to_data_segment(const string& name, const word& value, segment& data) {
53 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
54
55 data.lines.push_back(label(name));
56
57 data.lines.push_back(line());
58 emit_hex_bytes(data.lines.back(), SIZE(value.data)-2, 4);
59
60 data.lines.push_back(line());
61 line& curr = data.lines.back();
62 for (int i = 1; i < SIZE(value.data)-1; ++i) {
63 char c = value.data.at(i);
64 curr.words.push_back(word());
65 curr.words.back().data = hex_byte_to_string(c);
66 curr.words.back().metadata.push_back(string(1, c));
67 }
68 }
69
70 line label(string s) {
71 line result;
72 result.words.push_back(word());
73 result.words.back().data = (s+":");
74 return result;
75 }
76
77
78
79
80 :(scenarios parse_instruction_character_by_character)
81 :(scenario instruction_with_string_literal)
82 a "abc def" z
83 +parse2: word: a
84 +parse2: word: "abc def"
85 +parse2: word: z
86
87 $parse2: 3
88
89 :(before "End Line Parsing Special-cases(line_data -> l)")
90 if (line_data.find('"') != string::npos) {
91 parse_instruction_character_by_character(line_data, l);
92 continue;
93 }
94
95 :(code)
96 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
97 if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
98 raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
99 return;
100 }
101
102 istringstream in(line_data);
103 in >> std::noskipws;
104 line result;
105
106 while (has_data(in)) {
107 skip_whitespace(in);
108 if (!has_data(in)) break;
109 char c = in.get();
110 if (c == '#') break;
111 if (c == ':') break;
112 if (c == '.') {
113 if (!has_data(in)) break;
114 if (isspace(in.peek()))
115 continue;
116 }
117 result.words.push_back(word());
118 if (c == '"') {
119
120 ostringstream d;
121 d << c;
122 while (has_data(in)) {
123 in >> c;
124 d << c;
125 if (c == '"') break;
126 }
127 result.words.back().data = d.str();
128
129 ostringstream m;
130 while (!isspace(in.peek()) && has_data(in)) {
131 in >> c;
132 if (c == '/') {
133 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
134 m.str("");
135 }
136 else {
137 m << c;
138 }
139 }
140 if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
141 }
142 else {
143
144 ostringstream w;
145 w << c;
146 while (!isspace(in.peek()) && has_data(in)) {
147 in >> c;
148 w << c;
149 }
150 parse_word(w.str(), result.words.back());
151 }
152 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
153 }
154 if (!result.words.empty())
155 out.push_back(result);
156 }
157
158 void skip_whitespace(istream& in) {
159 while (true) {
160 if (has_data(in) && isspace(in.peek())) in.get();
161 else break;
162 }
163 }
164
165 void skip_comment(istream& in) {
166 if (has_data(in) && in.peek() == '#') {
167 in.get();
168 while (has_data(in) && in.peek() != '\n') in.get();
169 }
170 }
171
172
173 void parse_instruction_character_by_character(const string& line_data) {
174 vector<line> out;
175 parse_instruction_character_by_character(line_data, out);
176 }
177
178 :(scenario parse2_comment_token_in_middle)
179 a . z
180 +parse2: word: a
181 +parse2: word: z
182 -parse2: word: .
183
184 $parse2: 2
185
186 :(scenario parse2_word_starting_with_dot)
187 a .b c
188 +parse2: word: a
189 +parse2: word: .b
190 +parse2: word: c
191
192 :(scenario parse2_comment_token_at_start)
193 . a b
194 +parse2: word: a
195 +parse2: word: b
196 -parse2: word: .
197
198 :(scenario parse2_comment_token_at_end)
199 a b .
200 +parse2: word: a
201 +parse2: word: b
202 -parse2: word: .
203
204 :(scenario parse2_word_starting_with_dot_at_start)
205 .a b c
206 +parse2: word: .a
207 +parse2: word: b
208 +parse2: word: c
209
210 :(scenario parse2_metadata)
211 .a b/c d
212 +parse2: word: .a
213 +parse2: word: b /c
214 +parse2: word: d
215
216 :(scenario parse2_string_with_metadata)
217 a "bc def"/disp32 g
218 +parse2: word: a
219 +parse2: word: "bc def" /disp32
220 +parse2: word: g
221
222 :(scenario parse2_string_with_metadata_at_end)
223 a "bc def"/disp32
224 +parse2: word: a
225 +parse2: word: "bc def" /disp32
226
227 :(code)
228 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
229 parse_instruction_character_by_character(
230 "68/push \"test\"/f"
231 );
232 CHECK_TRACE_CONTENTS(
233 "parse2: word: 68 /push^D"
234 "parse2: word: \"test\" /f^D"
235 );
236 }
237
238
239
240 :(scenario parse2_string_containing_slashes)
241 a "bc/def"/disp32
242 +parse2: word: "bc/def" /disp32