From a49bc41365bf6b4f0c006c5fbdcb4b519634c42c Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sat, 1 Sep 2018 15:58:53 -0700 Subject: 4531 - automatically compute segment addresses --- subx/011run.cc | 43 +++++++++++++++++++++++++++++++++++++++++-- subx/028translate.cc | 40 ++++++++++++++++++++++++++++++++-------- subx/030---operands.cc | 31 ------------------------------- subx/035labels.cc | 10 ---------- subx/apps/factorial.subx | 2 +- subx/examples/ex1.1.subx | 2 +- subx/examples/ex1.2.subx | 2 +- subx/examples/ex2.subx | 2 +- subx/examples/ex3.subx | 2 +- subx/examples/ex4.subx | 2 +- subx/examples/ex5.subx | 2 +- subx/examples/ex6.subx | 2 +- subx/examples/ex7.subx | 2 +- subx/examples/ex8.subx | 2 +- 14 files changed, 83 insertions(+), 61 deletions(-) diff --git a/subx/011run.cc b/subx/011run.cc index 9c024e27..bca04289 100644 --- a/subx/011run.cc +++ b/subx/011run.cc @@ -104,7 +104,11 @@ struct program { struct segment { uint32_t start; vector lines; - segment() :start(0) {} + // End segment Fields + segment() { + start = 0; + // End segment Constructor + } }; :(before "struct segment") struct line { @@ -145,7 +149,10 @@ void parse(istream& fin, program& out) { out.segments.back().lines.swap(l); } segment s; - lin >> std::hex >> s.start; + string segment_title; + lin >> segment_title; + if (starts_with(segment_title, "0x")) + s.start = parse_int(segment_title); trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); out.segments.push_back(s); // todo? @@ -296,3 +303,35 @@ int32_t imm32() { result |= (next()<<24); return result; } + +:(code) +int32_t parse_int(const string& s) { + if (s.empty()) return 0; + istringstream in(s); + in >> std::hex; + if (s.at(0) == '-') { + int32_t result = 0; + in >> result; + if (!in || !in.eof()) { + raise << "not a number: " << s << '\n' << end(); + return 0; + } + return result; + } + uint32_t uresult = 0; + in >> uresult; + if (!in || !in.eof()) { + raise << "not a number: " << s << '\n' << end(); + return 0; + } + return static_cast(uresult); +} +:(before "End Unit Tests") +void test_parse_int() { + CHECK_EQ(0, parse_int("0")); + CHECK_EQ(0, parse_int("0x0")); + CHECK_EQ(0, parse_int("0x0")); + CHECK_EQ(16, parse_int("10")); // hex always + CHECK_EQ(-1, parse_int("-1")); + CHECK_EQ(-1, parse_int("0xffffffff")); +} diff --git a/subx/028translate.cc b/subx/028translate.cc index f3e30126..cc41e715 100644 --- a/subx/028translate.cc +++ b/subx/028translate.cc @@ -30,12 +30,28 @@ if (is_equal(argv[1], "translate")) { if (trace_contains_errors()) return 1; transform(p); if (trace_contains_errors()) return 1; + compute_segment_offsets(p); save_elf(p, argv[3]); if (trace_contains_errors()) unlink(argv[3]); return 0; } +:(before "End segment Fields") +uint32_t offset; +:(before "End segment Constructor") +offset = 0; :(code) +void compute_segment_offsets(program& p) { + uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/; + uint32_t cumulative_segment_size = 0; + for (size_t i = 0; i < p.segments.size(); ++i) { + segment& curr = p.segments.at(i); + curr.offset = p_offset + cumulative_segment_size; +//? cerr << "offset " << i << ": " << curr.offset << '\n'; + cumulative_segment_size += num_words(curr); + } +} + // write out a program to a bare-bones ELF file void save_elf(const program& p, const char* filename) { ofstream out(filename, ios::binary); @@ -45,6 +61,12 @@ void save_elf(const program& p, const char* filename) { out.close(); } +uint32_t start(const program& p, const int segment_index) { + const segment& seg = p.segments.at(segment_index); + if (seg.start != 0) return seg.start; // if start is already initialized, use it + return CODE_START + SEGMENT_SIZE*segment_index + seg.offset; +} + void write_elf_header(ostream& out, const program& p) { char c = '\0'; #define O(X) c = (X); out.write(&c, sizeof(c)) @@ -64,7 +86,7 @@ void write_elf_header(ostream& out, const program& p) { // e_version O(0x01); O(0x00); O(0x00); O(0x00); // e_entry - int e_entry = p.segments.at(0).start; // convention + int e_entry = start(p, /*segment*/0); // convention emit(e_entry); // e_phoff -- immediately after ELF header int e_phoff = 0x34; @@ -91,20 +113,22 @@ void write_elf_header(ostream& out, const program& p) { // e_shstrndx emit(dummy16); - uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/; for (int i = 0; i < SIZE(p.segments); ++i) { + const segment& curr = p.segments.at(i); //// phdr // p_type uint32_t p_type = 0x1; emit(p_type); // p_offset + uint32_t p_offset = curr.offset; emit(p_offset); // p_vaddr - emit(p.segments.at(i).start); + uint32_t p_start = start(p, i); + emit(p_start); // p_paddr - emit(p.segments.at(i).start); + emit(p_start); // p_filesz - uint32_t size = size_of(p.segments.at(i)); + uint32_t size = num_words(curr); assert(size < SEGMENT_SIZE); emit(size); // p_memsz @@ -126,8 +150,8 @@ void write_elf_header(ostream& out, const program& p) { // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95) uint32_t p_align = 0x1000; // default page size on linux emit(p_align); - if (p_offset % p_align != p.segments.at(i).start % p_align) { - raise << "segment starting at 0x" << HEXWORD << p.segments.at(i).start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p.segments.at(i).start % p_align) << '\n' << end(); + if (p_offset % p_align != p_start % p_align) { + raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end(); return; } @@ -148,7 +172,7 @@ void write_segment(const segment& s, ostream& out) { } } -uint32_t size_of(const segment& s) { +uint32_t num_words(const segment& s) { uint32_t sum = 0; for (int i = 0; i < SIZE(s.lines); ++i) sum += SIZE(s.lines.at(i).words); diff --git a/subx/030---operands.cc b/subx/030---operands.cc index 3e103b66..6fa2354d 100644 --- a/subx/030---operands.cc +++ b/subx/030---operands.cc @@ -430,37 +430,6 @@ bool is_hex_int(const string& s) { return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos; } -int32_t parse_int(const string& s) { - if (s.empty()) return 0; - istringstream in(s); - in >> std::hex; - if (s.at(0) == '-') { - int32_t result = 0; - in >> result; - if (!in || !in.eof()) { - raise << "not a number: " << s << '\n' << end(); - return 0; - } - return result; - } - uint32_t uresult = 0; - in >> uresult; - if (!in || !in.eof()) { - raise << "not a number: " << s << '\n' << end(); - return 0; - } - return static_cast(uresult); -} -:(before "End Unit Tests") -void test_parse_int() { - CHECK_EQ(0, parse_int("0")); - CHECK_EQ(0, parse_int("0x0")); - CHECK_EQ(0, parse_int("0x0")); - CHECK_EQ(16, parse_int("10")); // hex always - CHECK_EQ(-1, parse_int("-1")); - CHECK_EQ(-1, parse_int("0xffffffff")); -} - :(code) string to_string(const line& inst) { ostringstream out; diff --git a/subx/035labels.cc b/subx/035labels.cc index 0d366d2e..659e4391 100644 --- a/subx/035labels.cc +++ b/subx/035labels.cc @@ -18,16 +18,6 @@ //: be a single character long. 'a' is not a hex number, it's a variable. //: Later layers may add more conventions partitioning the space of names. But //: the above rules will remain inviolate. -bool is_number(const string& s) { - if (s.at(0) == '-') return true; - if (isdigit(s.at(0))) return true; - return SIZE(s) == 2; -} -:(before "End Unit Tests") -void test_is_number() { - CHECK(!is_number("a")); -} -:(code) void check_valid_name(const string& s) { if (s.empty()) { raise << "empty name!\n" << end(); diff --git a/subx/apps/factorial.subx b/subx/apps/factorial.subx index de9953bf..c531041b 100644 --- a/subx/apps/factorial.subx +++ b/subx/apps/factorial.subx @@ -7,7 +7,7 @@ # $ echo $? # 120 -== 0x08048054 # code segment, after leaving room for ELF header +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex1.1.subx b/subx/examples/ex1.1.subx index 1cbe5dc1..56b54a10 100644 --- a/subx/examples/ex1.1.subx +++ b/subx/examples/ex1.1.subx @@ -8,7 +8,7 @@ # $ echo $? # 42 -== 0x08048054 # code segment, after leaving room for ELF header +== code # opcode ModR/M SIB displacement immediate # instruction mod, reg, Reg/Mem bits scale, index, base # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex1.2.subx b/subx/examples/ex1.2.subx index 2652037a..7dca4ec3 100644 --- a/subx/examples/ex1.2.subx +++ b/subx/examples/ex1.2.subx @@ -8,7 +8,7 @@ # $ echo $? # 42 -== 0x08048054 # code segment, after leaving room for ELF header +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex2.subx b/subx/examples/ex2.subx index 6463132b..0aad9232 100644 --- a/subx/examples/ex2.subx +++ b/subx/examples/ex2.subx @@ -7,7 +7,7 @@ # $ echo $? # 2 -== 0x08048054 # code segment, after leaving room for ELF header +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex3.subx b/subx/examples/ex3.subx index a4012f92..eb4d6c01 100644 --- a/subx/examples/ex3.subx +++ b/subx/examples/ex3.subx @@ -7,7 +7,7 @@ # $ echo $? # 55 -== 0x08048054 # code segment, after leaving room for ELF header +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex4.subx b/subx/examples/ex4.subx index cd7003d3..2f5b0e73 100644 --- a/subx/examples/ex4.subx +++ b/subx/examples/ex4.subx @@ -4,7 +4,7 @@ # $ subx translate ex4.subx ex4 # $ subx run ex4 -== 0x08048074 # code segment, after leaving room for ELF header and segment headers +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex5.subx b/subx/examples/ex5.subx index e4a2db81..400e17c3 100644 --- a/subx/examples/ex5.subx +++ b/subx/examples/ex5.subx @@ -4,7 +4,7 @@ # $ subx translate ex5.subx ex5 # $ subx run ex5 -== 0x08048054 # code segment, after leaving room for ELF header and segment headers +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex6.subx b/subx/examples/ex6.subx index 4c75c617..3d05f00a 100644 --- a/subx/examples/ex6.subx +++ b/subx/examples/ex6.subx @@ -5,7 +5,7 @@ # $ subx run ex6 # Hello, world! -== 0x08048074 # code segment, after leaving room for ELF header and segment headers +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex7.subx b/subx/examples/ex7.subx index 6f8d3979..17a38e29 100644 --- a/subx/examples/ex7.subx +++ b/subx/examples/ex7.subx @@ -10,7 +10,7 @@ # $ echo $? # 97 -== 0x08048074 # code segment, after leaving room for ELF header and segment headers +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/examples/ex8.subx b/subx/examples/ex8.subx index 9ea547f1..452b917d 100644 --- a/subx/examples/ex8.subx +++ b/subx/examples/ex8.subx @@ -14,7 +14,7 @@ # ... # Locals start from ESP-4 downwards. -== 0x08048054 # code segment, after leaving room for ELF header and segment headers +== code # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -- cgit 1.4.1-2-gfad0