diff options
author | Kartik Agaram <vc@akkartik.com> | 2018-09-28 23:08:27 -0700 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2018-09-29 10:20:13 -0700 |
commit | 630433cd9cb97cf71d24bfc8fab6fb54ce40382a (patch) | |
tree | cf4cffae8599489e5efcbc18b965f804d5e3a8e8 /subx | |
parent | fd0cf1cd07ce01c3d6fe709d55b60ff9d1d5d44f (diff) | |
download | mu-630433cd9cb97cf71d24bfc8fab6fb54ce40382a.tar.gz |
4614 - redo simulated RAM
Now simulated 'Memory' isn't just a single flat array. Instead it knows about segments and VMAs. The code segment will always be first, and the data/heap segment will always be second. The brk() syscall knows about the data segment. One nice side-effect is that I no longer need to mess with Memory initialization regardless of where I place my segments.
Diffstat (limited to 'subx')
-rw-r--r-- | subx/010---vm.cc | 84 | ||||
-rw-r--r-- | subx/011run.cc | 2 | ||||
-rw-r--r-- | subx/012elf.cc | 39 | ||||
-rw-r--r-- | subx/013direct_addressing.cc | 3 | ||||
-rw-r--r-- | subx/020syscalls.cc | 15 | ||||
-rw-r--r-- | subx/034compute_segment_address.cc | 2 | ||||
-rw-r--r-- | subx/035labels.cc | 1 | ||||
-rw-r--r-- | subx/036global_variables.cc | 6 | ||||
-rw-r--r-- | subx/038---literal_strings.cc | 3 |
9 files changed, 101 insertions, 54 deletions
diff --git a/subx/010---vm.cc b/subx/010---vm.cc index 11ea60eb..c862ed99 100644 --- a/subx/010---vm.cc +++ b/subx/010---vm.cc @@ -89,26 +89,74 @@ SF = ZF = OF = false; //:: simulated RAM +:(before "End Types") +const uint32_t INITIAL_SEGMENT_SIZE = 0x1000 - 1; +// Subtract one just so we can start the first segment at address 1 without +// overflowing the first segment. Other segments will learn to adjust. + +// Like in real-world Linux, we'll allocate RAM for our programs in slabs +// called VMAs or Virtual Memory Areas. +struct vma { + uint32_t start; // inclusive + uint32_t end; // exclusive + vector<uint8_t> _data; + vma(uint32_t s, uint32_t e) :start(s), end(e) { + _data.resize(end-start); + } + vma(uint32_t s) :start(s), end(s+INITIAL_SEGMENT_SIZE) { + _data.resize(end-start); + } + bool match(uint32_t a) { + return a >= start && a < end; + } + bool match32(uint32_t a) { + return a >= start && a+4 <= end; + } + uint8_t& data(uint32_t a) { + assert(match(a)); + return _data.at(a-start); + } + void grow_until(uint32_t new_end_address) { + if (new_end_address < end) return; + end = new_end_address; + _data.resize(new_end_address - start); + } + // End vma Methods +}; + +:(before "End Globals") +// RAM is made of VMAs. +vector<vma> Mem; +:(code) +// The first 3 VMAs are special. When loading ELF binaries in later layers, +// we'll assume that the first VMA is for code, the second is for data +// (including the heap), and the third for the stack. +void grow_code_segment(uint32_t new_end_address) { + assert(!Mem.empty()); + Mem.at(0).grow_until(new_end_address); +} +void grow_data_segment(uint32_t new_end_address) { + assert(SIZE(Mem) > 1); + Mem.at(1).grow_until(new_end_address); +} :(before "End Globals") -vector<uint8_t> Mem; -uint32_t Mem_offset = 0; -uint32_t End_of_program = 0; +uint32_t End_of_program = 0; // when the program executes past this address in tests we'll stop the test +// The stack grows downward. Can't increase its size for now. :(before "End Reset") Mem.clear(); -Mem.resize(1024); -Mem_offset = 0; End_of_program = 0; :(code) // These helpers depend on Mem being laid out contiguously (so you can't use a // map, etc.) and on the host also being little-endian. inline uint8_t read_mem_u8(uint32_t addr) { - return Mem.at(addr-Mem_offset); + uint8_t* handle = mem_addr_u8(addr); // error messages get printed here + return handle ? *handle : 0; } inline int8_t read_mem_i8(uint32_t addr) { return static_cast<int8_t>(read_mem_u8(addr)); } inline uint32_t read_mem_u32(uint32_t addr) { - uint32_t* handle = mem_addr_u32(addr); + uint32_t* handle = mem_addr_u32(addr); // error messages get printed here return handle ? *handle : 0; } inline int32_t read_mem_i32(uint32_t addr) { @@ -116,16 +164,25 @@ inline int32_t read_mem_i32(uint32_t addr) { } inline uint8_t* mem_addr_u8(uint32_t addr) { - return &Mem.at(addr-Mem_offset); + for (int i = 0; i < SIZE(Mem); ++i) + if (Mem.at(i).match(addr)) + return &Mem.at(i).data(addr); + raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end(); + return NULL; } inline int8_t* mem_addr_i8(uint32_t addr) { return reinterpret_cast<int8_t*>(mem_addr_u8(addr)); } inline uint32_t* mem_addr_u32(uint32_t addr) { - return reinterpret_cast<uint32_t*>(mem_addr_u8(addr)); + for (int i = 0; i < SIZE(Mem); ++i) + if (Mem.at(i).match32(addr)) + return reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr)); + raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end(); + raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end(); + return NULL; } inline int32_t* mem_addr_i32(uint32_t addr) { - return reinterpret_cast<int32_t*>(mem_addr_u8(addr)); + return reinterpret_cast<int32_t*>(mem_addr_u32(addr)); } // helper for some syscalls. But read-only. inline const char* mem_addr_string(uint32_t addr) { @@ -149,6 +206,13 @@ inline void write_mem_i32(uint32_t addr, int32_t val) { if (handle != NULL) *handle = val; } +inline bool already_allocated(uint32_t addr) { + for (int i = 0; i < SIZE(Mem); ++i) + if (Mem.at(i).match(addr)) + return true; + return false; +} + //:: core interpreter loop :(code) diff --git a/subx/011run.cc b/subx/011run.cc index d3963e3e..22eaad9d 100644 --- a/subx/011run.cc +++ b/subx/011run.cc @@ -224,6 +224,8 @@ void load(const program& p) { for (int i = 0; i < SIZE(p.segments); ++i) { const segment& seg = p.segments.at(i); uint32_t addr = seg.start; + if (!already_allocated(addr)) + Mem.push_back(vma(seg.start)); trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); for (int j = 0; j < SIZE(seg.lines); ++j) { const line& l = seg.lines.at(j); diff --git a/subx/012elf.cc b/subx/012elf.cc index 787d914c..7bdbc548 100644 --- a/subx/012elf.cc +++ b/subx/012elf.cc @@ -9,8 +9,6 @@ if (is_equal(argv[1], "run")) { assert(argc > 2); reset(); cerr << std::hex; - initialize_mem(); - Mem_offset = CODE_START; load_elf(argv[2], argc, argv); while (EIP < End_of_program) // weak final-gasp termination check run_one_instruction(); @@ -60,9 +58,10 @@ void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[ // unused: e_shstrndx for (size_t i = 0; i < e_phnum; ++i) - load_segment_from_program_header(elf_contents, size, e_phoff + i*e_phentsize, e_ehsize); + load_segment_from_program_header(elf_contents, i, size, e_phoff + i*e_phentsize, e_ehsize); // initialize code and stack + Mem.push_back(vma(STACK_SEGMENT)); Reg[ESP].u = AFTER_STACK; Reg[EBP].u = 0; EIP = e_entry; @@ -70,6 +69,7 @@ void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[ // initialize args on stack // no envp for now // we wastefully use a separate page of memory for argv + Mem.push_back(vma(ARGV_DATA_SEGMENT)); uint32_t argv_data = ARGV_DATA_SEGMENT; for (int i = argc-1; i >= /*skip 'subx_bin' and 'run'*/2; --i) { push(argv_data); @@ -89,7 +89,7 @@ void push(uint32_t val) { write_mem_u32(Reg[ESP].u, val); } -void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32_t offset, uint32_t e_ehsize) { +void load_segment_from_program_header(uint8_t* elf_contents, int segment_index, size_t size, uint32_t offset, uint32_t e_ehsize) { uint32_t p_type = u32_in(&elf_contents[offset]); trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end(); if (p_type != 1) { @@ -103,35 +103,36 @@ void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32 uint32_t p_filesz = u32_in(&elf_contents[offset + 16]); uint32_t p_memsz = u32_in(&elf_contents[offset + 20]); if (p_filesz != p_memsz) - raise << "Can't handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die(); + raise << "Can't yet handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die(); if (p_offset + p_filesz > size) raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die(); - if (Mem.size() < p_vaddr + p_memsz) - Mem.resize(p_vaddr + p_memsz); - if (size > p_memsz) size = p_memsz; + if (p_memsz > INITIAL_SEGMENT_SIZE) { + raise << "Code segment too small for SubX; for now please manually increase INITIAL_SEGMENT_SIZE.\n" << end(); + return; + } trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end(); + if (size > p_memsz) size = p_memsz; + Mem.push_back(vma(p_vaddr)); for (size_t i = 0; i < p_filesz; ++i) write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]); - if (End_of_program < p_vaddr+p_memsz) + if (segment_index == 0 && End_of_program < p_vaddr+p_memsz) End_of_program = p_vaddr+p_memsz; } :(before "End Includes") // Very primitive/fixed/insecure ELF segments for now. // code: 0x08048000 -> 0x08048fff -// data: 0x08049000 -> 0x08049fff -// heap: 0x0804a000 -> 0x0804afff -// stack: 0x0804bfff -> 0x0804b000 (downward) -const int CODE_START = 0x08048000; +// data/heap: 0x08050000 -> 0x08050fff +// stack: 0x08060fff -> 0x08060000 (downward) const int SEGMENT_SIZE = 0x1000; -const int AFTER_STACK = 0x0804c000; -const int ARGV_DATA_SEGMENT = 0x0804e000; +const int CODE_START = 0x08048000; +const int DATA_SEGMENT = 0x08050000; +const int HEAP_SEGMENT = DATA_SEGMENT; +const int STACK_SEGMENT = 0x08060000; +const int AFTER_STACK = 0x08060ffc; // forget final word because of the off-by-one with INITIAL_SEGMENT_SIZE; +const int ARGV_DATA_SEGMENT = 0x08070000; :(code) -void initialize_mem() { - Mem.resize(AFTER_STACK - CODE_START); -} - inline uint32_t u32_in(uint8_t* p) { return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; } diff --git a/subx/013direct_addressing.cc b/subx/013direct_addressing.cc index 7b265a44..45e034ed 100644 --- a/subx/013direct_addressing.cc +++ b/subx/013direct_addressing.cc @@ -555,7 +555,8 @@ put(name, "5f", "pop top of stack to R7 (EDI)"); :(scenario pop_r32) % Reg[ESP].u = 0x60; -% write_mem_i32(0x60, 0x0000000a); +% Mem.push_back(vma(0x1)); // manually allocate memory +% write_mem_i32(0x60, 0x0000000a); // ..before this write == 0x1 # code segment # op ModR/M SIB displacement immediate 5b # pop stack to EBX diff --git a/subx/020syscalls.cc b/subx/020syscalls.cc index c7e3fa47..2940b06c 100644 --- a/subx/020syscalls.cc +++ b/subx/020syscalls.cc @@ -75,7 +75,7 @@ void process_int80() { break; case 45: // brk: modify size of data segment trace(91, "run") << "grow data segment to " << Reg[EBX].u << end(); - resize_mem(/*new end address*/Reg[EBX].u); + grow_data_segment(/*new end address*/Reg[EBX].u); break; default: raise << HEXWORD << EIP << ": unimplemented syscall " << Reg[EAX].u << '\n' << end(); @@ -102,16 +102,3 @@ void check_mode(int reg) { exit(1); } } - -void resize_mem(uint32_t new_end_address) { - if (new_end_address < Mem_offset) { - raise << HEXWORD << EIP << ": can't shrink data segment to before code segment\n" << end(); - return; - } - int32_t new_size = new_end_address - Mem_offset; - if (new_size < SIZE(Mem)) { - raise << HEXWORD << EIP << ": shrinking data segment is not supported.\n" << end(); - return; - } - Mem.resize(new_size); // will throw exception on failure -} diff --git a/subx/034compute_segment_address.cc b/subx/034compute_segment_address.cc index f5f383b6..71a18452 100644 --- a/subx/034compute_segment_address.cc +++ b/subx/034compute_segment_address.cc @@ -2,9 +2,7 @@ //: segment. //: This gives up a measure of control in placing code and data. -//: segment address computation requires setting Mem_offset in test mode to what it'll be in run mode :(scenario segment_name) -% Mem_offset = CODE_START; == code 05/add 0x0d0c0b0a/imm32 # add 0x0d0c0b0a to EAX # code starts at 0x08048000 + p_offset, which is 0x54 for a single-segment binary diff --git a/subx/035labels.cc b/subx/035labels.cc index 207b09b1..96668075 100644 --- a/subx/035labels.cc +++ b/subx/035labels.cc @@ -231,7 +231,6 @@ xz: //: ignore them. :(scenario segment_size_ignores_labels) -% Mem_offset = CODE_START; == code # 0x08048074 05/add 0x0d0c0b0a/imm32 # 5 bytes foo: # 0 bytes diff --git a/subx/036global_variables.cc b/subx/036global_variables.cc index 42790c0c..c565014f 100644 --- a/subx/036global_variables.cc +++ b/subx/036global_variables.cc @@ -7,8 +7,6 @@ //: This layer much the same structure as rewriting labels. :(scenario global_variable) -% Mem_offset = CODE_START; -% Mem.resize(0x2000); == code b9/copy x/imm32 # copy to ECX == data @@ -147,11 +145,9 @@ x: #? +error: can't call to the data segment ('x') :(scenario disp32_data_with_modrm) -% Mem_offset = CODE_START; -% Mem.resize(0x2000); == code 8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32 -== +== data x: 00 00 00 00 $error: 0 diff --git a/subx/038---literal_strings.cc b/subx/038---literal_strings.cc index 97542f43..b17947c0 100644 --- a/subx/038---literal_strings.cc +++ b/subx/038---literal_strings.cc @@ -4,10 +4,9 @@ //: always be the second segment). :(scenario transform_literal_string) -% Mem_offset = CODE_START; -% Mem.resize(AFTER_STACK - CODE_START); == code b8/copy "test"/imm32 # copy to EAX +== data # need to manually create this for now +transform: -- move literal strings to data segment +transform: adding global variable '__subx_global_1' containing "test" +transform: instruction after transform: 'b8 __subx_global_1' |