//: Allow instructions to mention literals directly. //: //: This layer will transparently move them to the global segment (assumed to //: always be the second segment). void test_transform_literal_string() { run( "== code 0x1\n" "b8/copy \"test\"/imm32\n" "== data 0x2000\n" // need an empty segment ); CHECK_TRACE_CONTENTS( "transform: -- move literal strings to data segment\n" "transform: adding global variable '__subx_global_1' containing \"test\"\n" "transform: line after transform: 'b8 __subx_global_1'\n" ); } //: We don't rely on any transforms running in previous layers, but this layer //: knows about labels and global variables and will emit them for previous //: layers to transform. :(after "Begin Transforms") Transform.push_back(transform_literal_strings); :(before "End Globals") int Next_auto_global = 1; :(before "End Reset") Next_auto_global = 1; :(code) void transform_literal_strings(program& p) { trace(3, "transform") << "-- move literal strings to data segment" << end(); if (p.segments.empty()) return; vector new_lines; for (int s = 0; s < SIZE(p.segments); ++s) { segment& seg = p.segments.at(s); trace(99, "transform") << "segment '" << seg.name << "'" << end(); for (int i = 0; i < SIZE(seg.lines); ++i) { //? cerr << seg.name << '/' << i << '\n'; line& line = seg.lines.at(i); for (int j = 0; j < SIZE(line.words); ++j) { word& curr = line.words.at(j); if (curr.data.at(0) != '"') continue; ostringstream global_name; global_name << "__subx_global_" << Next_auto_global; ++Next_auto_global; add_global_to_data_segment(global_name.str(), curr, new_lines); curr.data = global_name.str(); } trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end(); } } segment* data = find(p, "data"); if (data) data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end()); } void add_global_to_data_segment(const string& name, const word& value, vector& out) { trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end(); // emit label out.push_back(label(name)); // emit size for size-prefixed array out.push_back(line()); emit_hex_bytes(out.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/); // emit data byte by byte out.push_back(line()); line& curr = out.back(); for (int i = /*skip start quote*/1; i < SIZE(value.data)-/*skip end quote*/1; ++i) { char c = value.data.at(i); curr.words.push_back(word()); curr.words.back().data = hex_byte_to_string(c); curr.words.back().metadata.push_back(string(1, c)); } } //: Within strings, whitespace is significant. So we need to redo our instruction //: parsing. void test_instruction_with_string_literal() { parse_instruction_character_by_character( "a \"abc def\" z\n" // two spaces inside string ); CHECK_TRACE_CONTENTS( "parse2: word: a\n" "parse2: word: \"abc def\"\n" "parse2: word: z\n" ); // no other words CHECK_TRACE_COUNT("parse2", 3); } void test_string_literal_in_data_segment() { run( "== code 0x1\n" "b8/copy X/imm32\n" "== data 0x2000\n" "X:\n" "\"test\"/imm32\n" ); CHECK_TRACE_CONTENTS( "transform: -- move literal strings to data segment\n" "transform: adding global variable '__subx_global_1' containing \"test\"\n" "transform: line after transform: '__subx_global_1'\n" ); } void test_string_literal_with_missing_quote() { Hide_errors = true; run( "== code 0x1\n" "b8/copy \"test/imm32\n" "== data 0x2000\n" ); CHECK_TRACE_CONTENTS( "error: unclosed string in: b8/copy \"test/imm32" ); } :(before "End Line Parsing Special-cases(line_data -> l)") if (line_data.find('"') != string::npos) { // can cause false-positives, but we can handle them parse_instruction_character_by_character(line_data, l); continue; } :(code) void parse_instruction_character_by_character(const string& line_data, vector& out) { if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) { raise << "parse_instruction_character_by_character: should receive only a single line\n" << end(); return; } // parse literals istringstream in(line_data); in >> std::noskipws; line result; result.original = line_data; // add tokens (words or strings) one by one while (has_data(in)) { skip_whitespace(in); if (!has_data(in)) break; char c = in.get(); if (c == '#') break; // comment; drop rest of line if (c == ':') break; // line metadata; skip for now if (c == '.') { if (!has_data(in)) break; // comment token at end of line if (isspace(in.peek())) continue; // '.' followed by space is comment token; skip } result.words.push_back(word()); if (c == '"') { // stri