<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>Mu - 002test.cc</title>
<meta name="Generator" contentpre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long *///: Allow instructions to mention literals directly.
//:
//: This layer will transparently move them to the global segment (assumed to
//: always be the second segment).
void test_transform_literal_string() {
run(
"== code 0x1\n"
"b8/copy \"test\"/imm32\n"
"== data 0x2000\n" // need an empty segment
);
CHECK_TRACE_CONTENTS(
"transform: -- move literal strings to data segment\n"
"transform: adding global variable '__subx_global_1' containing \"test\"\n"
"transform: line after transform: 'b8 __subx_global_1'\n"
);
}
//: We don't rely on any transforms running in previous layers, but this layer
//: knows about labels and global variables and will emit them for previous
//: layers to transform.
:(after "Begin Transforms")
Transform.push_back(transform_literal_strings);
:(before "End Globals")
int Next_auto_global = 1;
:(before "End Reset")
Next_auto_global = 1;
:(code)
void transform_literal_strings(program& p) {
trace(3, "transform") << "-- move literal strings to data segment" << end();
if (p.segments.empty()) return;
vector<line> new_lines;
for (int s = 0; s < SIZE(p.segments); ++s) {
segment& seg = p.segments.at(s);
trace(99, "transform") << "segment '" << seg.name << "'" << end();
for (int i = 0; i < SIZE(seg.lines); ++i) {
//? cerr << seg.name << '/' << i << '\n';
line& line = seg.lines.at(i);
for (int j = 0; j < SIZE(line.words); ++j) {
word& curr = line.words.at(j);
if (curr.data.at(0) != '"') continue;
ostringstream global_name;
global_name << "__subx_global_" << Next_auto_global;
++Next_auto_global;
add_global_to_data_segment(global_name.str(), curr, new_lines);
curr.data = global_name.str();
}
trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end();
}
}
segment* data = find(p, "data");
if (data)
data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end());
}
void add_global_to_data_segment(const string& name, const word& value, vector<line>& out) {
trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
// emit label
out.push_back(label(name));
// emit size for size-prefixed array
out.push_back(line());
emit_hex_bytes(out.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/);
// emit data byte by byte
out.push_back(line());
line& curr = out.back();
for (int i = /*skip start quote*/1; i < SIZE(value.data)-/*skip end quote*/1; ++i) {
char c = value.data.at(i);
curr.words.push_back(word());
curr.words.back().data = hex_byte_to_string(c);
curr.words.back().metadata.push_back(string(1, c));
}
}
//: Within strings, whitespace is significant. So we need to redo our instruction
//: parsing.
void test_instruction_with_string_literal() {
parse_instruction_character_by_character(
"a \"abc def\" z\n" // two spaces inside string
);
CHECK_TRACE_CONTENTS(
"parse2: word: a\n"
"parse2: word: \"abc def\"\n"
"parse2: word: z\n"
);
// no other words
CHECK_TRACE_COUNT("parse2", 3);
}
void test_string_literal_in_data_segment() {
run(
"== code 0x1\n"
"b8/copy X/imm32\n"
"== data 0x2000\n"
"X:\n"
"\"test\"/imm32\n"
);
CHECK_TRACE_CONTENTS(
"transform: -- move literal strings to data segment\n"
"transform: adding global variable '__subx_global_1' containing \"test\"\n"
"transform: line after transform: '__subx_global_1'\n"
);
}
void test_string_literal_with_missing_quote() {
Hide_errors = true;
run(
"== code 0x1\n"
"b8/copy \"test/imm32\n"
"== data 0x2000\n"
);
CHECK_TRACE_CONTENTS(
"error: unclosed string in: b8/copy \"test/imm32"
);
}
:(before "End Line Parsing Special-cases(line_data -> l)")
if (line_data.find('"') != string::npos) { // can cause false-positives, but we can handle them
parse_instruction_character_by_character(line_data, l);
continue;
}
:(code)
void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
if (line_data.find('\n') != string::npos && line_data.find('\n') != line_data.size()-1) {
raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
return;
}
// parse literals
istringstream in(line_data);
in >> std::noskipws;
line result;
result.original = line_data;
// add tokens (words or strings) one by one
while (has_data(in)) {
skip_whitespace(in);
if (!has_data(in)) break;
char c = in.get();
if (c == '#') break; // comment; drop rest of line
if (c == ':') break; // line metadata; skip for now
if (c == '.') {
if (!has_data(in)) break; // comment token at end of line
if (isspace(in.peek()))
continue; // '.' followed by space is comment token; skip
}
result.words.push_back(word());
if (c == '"') {
// string literal; slurp everything between quotes into data
ostringstream d;
d << c;
while (true) {
if (!has_data(in)) {
raise << "unclosed string in: " << line_data << end();
return;
}
in >> c;
if (c == '\\') {
in >> c;
if (c == 'n') d << '\n';
else if (c == '"') d << '"';
else if (c == '\\') d << '\\';
else {
raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
return;
}
continue;
} else {
d << c;
}
if (c == '"') break;
}
result.words.back().data = d.str();
result.words.back().original = d.str();
// slurp metadata
ostringstream m;
while (!isspace(in.peek()) && has_data(in)) { // peek can sometimes trigger eof(), so do it first
in >> c;
if (c == '/') {
if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
m.str("");
}
else {
m << c;
}
}
if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
}
else {
// not a string literal; slurp all characters until whitespace
ostringstream w;
w << c;
while (!isspace(in.peek()) && has_data(in)) { // peek can sometimes trigger eof(), so do it first
in >> c;
w << c;
}
parse_word(w.str(), result.words.back());
}
trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
}
if (!result.words.empty())
out.push_back(result);
}
void skip_whitespace(istream& in) {
while (has_data(in) && isspace(in.peek())) {
in.get();
}
}
void skip_comment(istream& in) {
if (has_data(in) && in.peek() == '#') {
in.get();
while (has_data(in) && in.peek() != '\n') in.get();
}
}
line label(string s) {
line result;
result.words.push_back(word());
result.words.back().data = (s+":");
return result;
}
// helper for tests
void parse_instruction_character_by_character(const string& line_data) {
vector<line> out;
parse_instruction_character_by_character(line_data, out);
}
void test_parse2_comment_token_in_middle() {
parse_instruction_character_by_character(
"a . z\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: a\n"
"parse2: word: z\n"
);
CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
// no other words
CHECK_TRACE_COUNT("parse2", 2);
}
void test_parse2_word_starting_with_dot() {
parse_instruction_character_by_character(
"a .b c\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: a\n"
"parse2: word: .b\n"
"parse2: word: c\n"
);
}
void test_parse2_comment_token_at_start() {
parse_instruction_character_by_character(
". a b\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: a\n"
"parse2: word: b\n"
);
CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
}
void test_parse2_comment_token_at_end() {
parse_instruction_character_by_character(
"a b .\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: a\n"
"parse2: word: b\n"
);
CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
}
void test_parse2_word_starting_with_dot_at_start() {
parse_instruction_character_by_character(
".a b c\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: .a\n"
"parse2: word: b\n"
"parse2: word: c\n"
);
}
void test_parse2_metadata() {
parse_instruction_character_by_character(
".a b/c d\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: .a\n"
"parse2: word: b /c\n"
"parse2: word: d\n"
);
}
void test_parse2_string_with_metadata() {
parse_instruction_character_by_character(
"a \"bc def\"/disp32 g\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: a\n"
"parse2: word: \"bc def\" /disp32\n"
"parse2: word: g\n"
);
}
void test_parse2_string_with_metadata_at_end() {
parse_instruction_character_by_character(
"a \"bc def\"/disp32\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: a\n"
"parse2: word: \"bc def\" /disp32\n"
);
}
void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
parse_instruction_character_by_character(
"68/push \"test\"/f" // no newline, which is how calls from parse() will look
);
CHECK_TRACE_CONTENTS(
"parse2: word: 68 /push\n"
"parse2: word: \"test\" /f\n"
);
}
//: Make sure slashes inside strings don't trigger adding stuff from inside the
//: string to metadata.
void test_parse2_string_containing_slashes() {
parse_instruction_character_by_character(
"a \"bc/def\"/disp32\n"
);
CHECK_TRACE_CONTENTS(
"parse2: word: \"bc/def\" /disp32\n"
);
}
void test_instruction_with_string_literal_with_escaped_quote() {
parse_instruction_character_by_character(
"\"a\\\"b\"\n" // escaped quote inside string
);
CHECK_TRACE_CONTENTS(
"parse2: word: \"a\"b\"\n"
);
// no other words
CHECK_TRACE_COUNT("parse2", 1);
}
void test_instruction_with_string_literal_with_escaped_backslash() {
parse_instruction_character_by_character(
"\"a\\\\b\"\n" // escaped backslash inside string
);
CHECK_TRACE_CONTENTS(
"parse2: word: \"a\\b\"\n"
);
// no other words
CHECK_TRACE_COUNT("parse2", 1);
}