Mu - subx/038---literal

  1 //: Allow instructions to mention literals directly.
  2 //:
  3 //: This layer will transparently move them to the global segment (assumed to
  4 //: always be the second segment).
  5 
  6 :(scenario transform_literal_string)
  7 % Mem_offset = CODE_START;
  8 % Mem.resize(AFTER_STACK - CODE_START);
  9 == code
 10   b8/copy "test"/imm32  # copy to EAX
 11 +transform: -- move literal strings to data segment
 12 +transform: adding global variable '__subx_global_1' containing "test"
 13 +transform: instruction after transform: 'b8 __subx_global_1'
 14 
 15 //: We don't rely on any transforms running in previous layers, but this layer
 16 //: knows about labels and global variables and will emit them for previous
 17 //: layers to transform.
 18 :(after "Begin Transforms")
 19 // Begin Level-3 Transforms
 20 Transform.push_back(transform_literal_strings);
 21 // End Level-3 Transforms
 22 
 23 :(before "End Globals")
 24 int Next_auto_global = 1;
 25 :(code)
 26 void transform_literal_strings(program& p) {
 27   trace(99, "transform") << "-- move literal strings to data segment" << end();
 28   if (p.segments.empty()) return;
 29   segment& code = p.segments.at(0);
 30   segment data;
 31   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 32     line& inst = code.lines.at(i);
 33     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 34       word& curr = inst.words.at(j);
 35       if (curr.data.at(0) != '"') continue;
 36       ostringstream global_name;
 37       global_name << "__subx_global_" << Next_auto_global;
 38       ++Next_auto_global;
 39       add_global_to_data_segment(global_name.str(), curr, data);
 40       curr.data = global_name.str();
 41     }
 42     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
 43   }
 44   if (data.lines.empty()) return;
 45   if (SIZE(p.segments) < 2) {
 46     p.segments.resize(2);
 47     p.segments.at(1).lines.swap(data.lines);
 48   }
 49   vector<line>& existing_data = p.segments.at(1).lines;
 50   existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end());
 51 }
 52 
 53 void add_global_to_data_segment(const string& name, const word& value, segment& data) {
 54   trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
 55   // emit label
 56   data.lines.push_back(label(name));
 57   // emit size for size-prefixed array
 58   data.lines.push_back(line());
 59   emit_hex_bytes(data.lines.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/);
 60   // emit data byte by byte
 61   data.lines.push_back(line());
 62   line& curr = data.lines.back();
 63   for (int i = /*skip start quote*/1;  i < SIZE(value.data)-/*skip end quote*/1;  ++i) {
 64     char c = value.data.at(i);
 65     curr.words.push_back(word());
 66     curr.words.back().data = hex_byte_to_string(c);
 67     curr.words.back().metadata.push_back(string(1, c));
 68   }
 69 }
 70 
 71 line label(string s) {
 72   line result;
 73   result.words.push_back(word());
 74   result.words.back().data = (s+":");
 75   return result;
 76 }
 77 
 78 //: Within strings, whitespace is significant. So we need to redo our instruction
 79 //: parsing.
 80 
 81 :(scenarios parse_instruction_character_by_character)
 82 :(scenario instruction_with_string_literal)
 83 a "abc  def" z  # two spaces inside string
 84 +parse2: word: a
 85 +parse2: word: "abc  def"
 86 +parse2: word: z
 87 # no other words
 88 $parse2: 3
 89 
 90 :(before "End Line Parsing Special-cases(line_data -> l)")
 91 if (line_data.find('"') != string::npos) {  // can cause false-positives, but we can handle them
 92   parse_instruction_character_by_character(line_data, l);
 93   continue;
 94 }
 95 
 96 :(code)
 97 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
 98   // parse literals
 99   istringstream in(line_data);
100   in >> std::noskipws;
101   line result;
102   // add tokens (words or strings) one by one
103   while (has_data(in)) {
104     skip_whitespace(in);
105     if (!has_data(in)) break;
106     char c = in.get();
107     if (c == '#') break;  // comment; drop rest of line
108     if (c == ':') break;  // line metadata; skip for now
109     if (c == '.') {
110       if (!has_data(in)) break;  // comment token at end of line
111       if (isspace(in.peek()))
112         continue;  // '.' followed by space is comment token; skip
113     }
114     ostringstream w;
115     w << c;
116     if (c == '"') {
117       // slurp until '"'
118       while (has_data(in)) {
119         in >> c;
120         w << c;
121         if (c == '"') break;
122       }
123     }
124     // slurp any remaining characters until whitespace
125     while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
126       in >> c;
127       w << c;
128     }
129     result.words.push_back(word());
130     parse_word(w.str(), result.words.back());
131     trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
132   }
133   if (!result.words.empty())
134     out.push_back(result);
135 }
136 
137 void skip_whitespace(istream& in) {
138   while (true) {
139     if (has_data(in) && isspace(in.peek())) in.get();
140     else break;
141   }
142 }
143 
144 void skip_comment(istream& in) {
145   if (has_data(in) && in.peek() == '#') {
146     in.get();
147     while (has_data(in) && in.peek() != '\n') in.get();
148   }
149 }
150 
151 // helper for tests
152 void parse_instruction_character_by_character(const string& line_data) {
153   vector<line> out;
154   parse_instruction_character_by_character(line_data, out);
155 }
156 
157 :(scenario parse2_comment_token_in_middle)
158 a . z
159 +parse2: word: a
160 +parse2: word: z
161 -parse2: word: .
162 # no other words
163 $parse2: 2
164 
165 :(scenario parse2_word_starting_with_dot)
166 a .b c
167 +parse2: word: a
168 +parse2: word: .b
169 +parse2: word: c
170 
171 :(scenario parse2_comment_token_at_start)
172 . a b
173 +parse2: word: a
174 +parse2: word: b
175 -parse2: word: .
176 
177 :(scenario parse2_comment_token_at_end)
178 a b .
179 +parse2: word: a
180 +parse2: word: b
181 -parse2: word: .
182 
183 :(scenario parse2_word_starting_with_dot_at_start)
184 .a b c
185 +parse2: word: .a
186 +parse2: word: b
187 +parse2: word: c
188 
189 :(scenario parse2_metadata)
190 .a b/c d
191 +parse2: word: .a
192 +parse2: word: b /c
193 +parse2: word: d
194 
195 :(scenario parse2_string_with_metadata)
196 a "bc  def"/disp32 g
197 +parse2: word: a
198 +parse2: word: "bc  def" /disp32
199 +parse2: word: g
200 
201 :(scenario parse2_string_with_metadata_at_end)
202 a "bc  def"/disp32
203 +parse2: word: a
204 +parse2: word: "bc  def" /disp32
205 
206 :(code)
207 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
208   parse_instruction_character_by_character(
209       "68/push \"test\"/f"  // no newline, which is how calls from parse() will look
210   );
211   CHECK_TRACE_CONTENTS(
212       "parse2: word: 68 /push^D"
213       "parse2: word: \"test\" /f^D"
214   );
215 }