## Lines in source files
Initial -whitespace/comments/tests
apps/factorial.subx 120 44
apps/crenshaw2-1.subx 561 180
apps/crenshaw2-1b.subx 757 186
apps/hex.subx 1442 149
apps/survey_elf.subx 4733 905
apps/pack.subx 5881 840
apps/dquotes.subx 1925 383
apps/assort.subx 905 183
apps/tests.subx 284 137
apps/sigils.subx 4641 896
apps/calls.subx 1785 448
apps/braces.subx 360 121
apps/mu.subx 36692 12858
## Total source lines needed including libraries
Initial -whitespace/comments/tests/dead code
apps/factorial.subx 8436 1700
apps/crenshaw2-1.subx 8644 1925
apps/crenshaw2-1b.subx 8736 1931
apps/hex.subx 9065 1908
apps/survey_elf.subx 10217 3248
apps/pack.subx 10589 2727
apps/dquotes.subx 9262 2468
apps/assort.subx 8686 2425
apps/tests.subx 8519 2214
apps/sigils.subx 10578 3043
apps/calls.subx 9242 2388
apps/braces.subx 8545 2111
apps/mu.subx 35438 15820
## executable size in KB
Initial -tests/dead code
apps/crenshaw2-1 41 4.3
apps/crenshaw2-1b 42 5.2
apps/factorial 42 5.2
apps/hex 45 5.0
apps/survey_elf 51 9.6
apps/pack 54 7.6
apps/dquotes 46 6.5
apps/assort 42 6.4
apps/tests 41 5.8
apps/sigils 54 9.1
apps/calls 47 7.1
apps/braces 42 5.9
apps/mu 563 131.0
## history of apps/mu.subx
date commit mu.subx -tests/cmts binary (KB excl. dead code)
parsing function headers 2019 Oct 30 5725 621 277 6.9
function calls Nov 10 5739 1202 346 7.2
code-generating primitive instructions Nov 17 5750 1923 363 7.3
arguments Nov 30 5785 4038 1330 13
return values 2020 Jan 1 5878 5432 1457 15
compound types: `addr` and `array` //: Global variables.
//:
//: Global variables are just labels in the data segment.
//: However, they can only be used in imm32 and not disp32 arguments. And they
//: can't be used with jump and call instructions.
//:
//: This layer has much the same structure as rewriting labels.
:(code)
void test_global_variable() {
run(
"== code 0x1\n"
"b9 x/imm32\n"
"== data 0x2000\n"
"x:\n"
" 00 00 00 00\n"
);
CHECK_TRACE_CONTENTS(
"transform: global variable 'x' is at address 0x00002000\n"
);
}
:(before "End Transforms")
Transform.push_back(rewrite_global_variables);
:(code)
void rewrite_global_variables(program& p) {
trace(3, "transform") << "-- rewrite global variables" << end();
// Begin rewrite_global_variables
map<string, uint32_t> address;
compute_addresses_for_global_variables(p, address);
if (trace_contains_errors()) return;
drop_global_variables(p);
replace_global_variables_with_addresses(p, address);
}
void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
for (int i = 0; i < SIZE(p.segments); ++i) {
if (p.segments.at(i).name != "code")
compute_addresses_for_global_variables(p.segments.at(i), address);
}
}
void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
int current_address = s.start;
for (int i = 0; i < SIZE(s.lines); ++i) {
const line& inst = s.lines.at(i);
for (int j = 0; j < SIZE(inst.words); ++j) {
const word& curr = inst.words.at(j);
if (*curr.data.rbegin() != ':') {
current_address += size_of(curr);
}
else {
string variable = drop_last(curr.data);
// ensure variables look sufficiently different from raw hex
check_valid_name(variable);
if (trace_contains_errors()) return;
if (j > 0)
raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
if (Labels_file.is_open())
Labels_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
if (contains_key(address, variable)) {
raise << "duplicate global '" << variable << "'\n" << end();
return;
}
put(address, variable, current_address);
trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
// no modifying current_address; global variable definitions won't be in the final binary
}
}
}
}
void drop_global_variables(program& p) {
for (int i = 0; i < SIZE(p.segments); ++i) {
if (p.segments.at(i).name != "code")
drop_labels(p.segments.at(i));
}
}
void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
if (p.segments.empty()) return;
for (int i = 0; i < SIZE(p.segments); ++i) {
segment& curr = p.segments.at(i);
if (curr.name == "code")
replace_global_variables_in_code_segment(curr, address);
else
replace_global_variables_in_data_segment(curr, address);
}
}
void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
for (int i = 0; i < SIZE(code.lines); ++i) {
line& inst = code.lines.at(i);
line new_inst;
for (int j = 0; j < SIZE(inst.words); ++j) {
const word& curr = inst.words.at(j);
if (!contains_key(address, curr.data)) {
if (!looks_like_hex_int(curr.data))
raise << "missing reference to global '" << curr.data << "'\n" << end();
new_inst.words.push_back(curr);
continue;
}
if (!valid_use_of_global_variable(curr)) {
raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
return;
}
emit_hex_bytes(new_inst, get(address, curr.data), 4);
}
inst.words.swap(new_inst.words);
trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
}
}
void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
for (int i = 0; i < SIZE(data.lines); ++i) {
line& l = data.lines.at(i);
line new_l;
for (int j = 0; j < SIZE(l.words); ++j) {
const word& curr = l.words.at(j);
if (!contains_key(address, curr.data)) {
if (looks_like_hex_int(curr.data)) {
if (has_argument_metadata(curr, "imm32"))
emit_hex_bytes(new_l, curr, 4);
else if (has_argument_metadata(curr, "imm16"))
emit_hex_bytes(new_l, curr, 2);
else if (has_argument_metadata(curr, "imm8"))
emit_hex_bytes(new_l, curr, 1);
else if (has_argument_metadata(curr, "disp8"))
raise << "can't use /disp8 in a non-code segment\n" << end();
else if (has_argument_metadata(curr, "disp16"))
raise << "can't use /disp16 in a non-code segment\n" << end();
else if (has_argument_metadata(curr, "disp32"))
raise << "can't use /disp32 in a non-code segment\n" << end();
else
new_l.words.push_back(curr);
}
else {
raise << "missing reference to global '" << curr.data << "'\n" << end();
new_l.words.push_back(curr);
}
continue;
}
trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
emit_hex_bytes(new_l, get(address, curr.data), 4);
}
l.words.swap(new_l.words);
trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
}
}
bool valid_use_of_global_variable(const word& curr) {
if (has_argument_metadata(curr, "imm32")) return true;
// End Valid Uses Of Global Variable(curr)
return false;
}
//:: a more complex sanity check for how we use global variables
//: requires first saving some data early before we pack arguments
:(after "Begin Transforms")
Transform.push_back(correlate_disp32_with_mod);
:(code)
void correlate_disp32_with_mod(program& p) {
if (p.segments.empty()) return;
segment& code = *find(p, "code");
for (int i = 0; i < SIZE(code.lines); ++i) {
line& inst = code.lines.at(i);
for (int j = 0; j < SIZE(inst.words); ++j) {
word& curr = inst.words.at(j);
if (has_argument_metadata(curr, "disp32")
&& has_argument_metadata(inst, "mod"))
curr.metadata.push_back("has_mod");
}
}
}
:(before "End Valid Uses Of Global Variable(curr)")
if (has_argument_metadata(curr, "disp32"))
return has_metadata(curr, "has_mod");
// todo: more sophisticated check, to ensure we don't use global variable
// addresses as a real displacement added to other arguments.
:(code)
bool has_metadata(const word& w, const string& m) {
for (int i = 0; i < SIZE(w.metadata); ++i)
if (w.metadata.at(i) == m) return true;
return false;
}
void test_global_variable_disallowed_in_jump() {
Hide_errors = true;
run(
"== code 0x1\n"
"eb/jump x/disp8\n"
"== data 0x2000\n"
"x:\n"
" 00 00 00 00\n"
);
CHECK_TRACE_CONTENTS(
"error: 'eb/jump x/disp8': can't refer to global variable 'x'\n"
// sub-optimal error message; should be
//? "error: can't jump to data (variable 'x')\n"
);
}
void test_global_variable_disallowed_in_call() {
Hide_errors = true;
run(
"== code 0x1\n"
"e8/call x/disp32\n"
"== data 0x2000\n"
"x:\n"
" 00 00 00 00\n"
);
CHECK_TRACE_CONTENTS(
"error: 'e8/call x/disp32': can't refer to global variable 'x'\n"
// sub-optimal error message; should be
//? "error: can't call to the data segment ('x')\n"
);
}
void test_global_variable_in_data_segment() {
run(
"== code 0x1\n"
"b9 x/imm32\n"
"== data 0x2000\n"
"x:\n"
" y/imm32\n"
"y:\n"
" 00 00 00 00\n"
);
// check that we loaded 'x' with the address of 'y'
CHECK_TRACE_CONTENTS(
"load: 0x00002000 -> 04\n"
"load: 0x00002001 -> 20\n"
"load: 0x00002002 -> 00\n"
"load: 0x00002003 -> 00\n"
);
CHECK_TRACE_COUNT("error", 0);
}
void test_raw_number_with_imm32_in_data_segment() {
run(
"== code 0x1\n"
"b9 x/imm32\n"
"== data 0x2000\n"
"x:\n"
" 1/imm32\n"
);
// check that we loaded 'x' with the address of 1
CHECK_TRACE_CONTENTS(
"load: 0x00002000 -> 01\n"
"load: 0x00002001 -> 00\n"
"load: 0x00002002 -> 00\n"
"load: 0x00002003 -> 00\n"
);
CHECK_TRACE_COUNT("error", 0);
}
void test_duplicate_global_variable() {
Hide_errors = true;
run(
"== code 0x1\n"
"40/increment-EAX\n"
"== data 0x2000\n"
"x:\n"
"x:\n"
" 00\n"
);
CHECK_TRACE_CONTENTS(
"error: duplicate global 'x'\n"
);
}
void test_global_variable_disp32_with_modrm() {
run(
"== code 0x1\n"
"8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32\n"
"== data 0x2000\n"
"x:\n"
" 00 00 00 00\n"
);
CHECK_TRACE_COUNT("error", 0);
}
void test_global_variable_disp32_with_call() {
transform(
"== code 0x1\n"
"foo:\n"
" e8/call bar/disp32\n"
"bar:\n"
);
CHECK_TRACE_COUNT("error", 0);
}
string to_full_string(const line& in) {
ostringstream out;
for (int i = 0; i < SIZE(in.words); ++i) {
if (i > 0) out << ' ';
out << in.words.at(i).data;
for (int j = 0; j < SIZE(in.words.at(i).metadata); ++j)
out << '/' << in.words.at(i).metadata.at(j);
}
return out.str();
}