From f1b3d7b96749165d771b279b56cc05447b7db3e0 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Thu, 30 Aug 2018 01:15:45 -0700 Subject: 4527 - reading commandline arguments The new example ex9 doesn't yet work natively. In the process I've emulated the kernel's role in providing args, implemented a couple of instructions acting on 8-bit operands (useful for ASCII string operations), and begun the start of the standard library (ascii_length is the same as strlen). At the level of SubX we're just only going to support ASCII. --- subx/012elf.cc | 40 ++++++++++++++++++++++--- subx/013direct_addressing.cc | 7 ----- subx/014indirect_addressing.cc | 61 +++++++++++++++++++++++++++++++++++++- subx/030---operands.cc | 1 + subx/031check_operands.cc | 2 ++ subx/ex9 | Bin 0 -> 147 bytes subx/ex9.subx | 65 +++++++++++++++++++++++++++++++++++++++++ subx/run | 4 +-- 8 files changed, 166 insertions(+), 14 deletions(-) create mode 100755 subx/ex9 create mode 100644 subx/ex9.subx (limited to 'subx') diff --git a/subx/012elf.cc b/subx/012elf.cc index 58d2cde8..78131090 100644 --- a/subx/012elf.cc +++ b/subx/012elf.cc @@ -10,7 +10,7 @@ if (is_equal(argv[1], "run")) { cerr << std::hex; initialize_mem(); Mem_offset = CODE_START; - load_elf(argv[2]); + load_elf(argv[2], argc, argv); while (EIP < End_of_program) // weak final-gasp termination check run_one_instruction(); trace(90, "load") << "executed past end of the world: " << EIP << " vs " << End_of_program << end(); @@ -18,7 +18,7 @@ if (is_equal(argv[1], "run")) { } :(code) -void load_elf(const string& filename) { +void load_elf(const string& filename, int argc, char* argv[]) { int fd = open(filename.c_str(), O_RDONLY); if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die(); off_t size = lseek(fd, 0, SEEK_END); @@ -27,11 +27,11 @@ void load_elf(const string& filename) { if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die(); ssize_t read_size = read(fd, elf_contents, size); if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die(); - load_elf_contents(elf_contents, size); + load_elf_contents(elf_contents, size, argc, argv); free(elf_contents); } -void load_elf_contents(uint8_t* elf_contents, size_t size) { +void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) { uint8_t magic[5] = {0}; memcpy(magic, elf_contents, 4); if (memcmp(magic, "\177ELF", 4) != 0) @@ -65,6 +65,36 @@ void load_elf_contents(uint8_t* elf_contents, size_t size) { Reg[ESP].u = AFTER_STACK; Reg[EBP].u = 0; EIP = e_entry; + + // initialize args on stack + // no envp for now +//? cerr << ARGV_POINTER_SEGMENT << " at " << Reg[ESP].u-4 << '\n'; + push(ARGV_POINTER_SEGMENT); +//? cerr << argc-2 << " at " << Reg[ESP].u-4 << '\n'; + push(argc-/*skip 'subx_bin' and 'run'*/2); + // initialize arg data + // we wastefully use 2 whole pages of memory for this + uint32_t argv_data = ARGV_DATA_SEGMENT; + uint32_t argv_pointers = ARGV_POINTER_SEGMENT; + for (int i = /*skip 'subx_bin' and 'run'*/2; i < argc; ++i) { +//? cerr << "pointer: " << argv_pointers << " => " << argv_data << '\n'; + write_mem_u32(argv_pointers, argv_data); + argv_pointers += sizeof(uint32_t); + assert(argv_pointers < ARGV_POINTER_SEGMENT + SEGMENT_SIZE); + for (size_t j = 0; j <= strlen(argv[i]); ++j) { +//? cerr << " data: " << argv[i][j] << " (" << NUM(argv[i][j]) << ")\n"; + write_mem_u8(argv_data, argv[i][j]); + argv_data += sizeof(char); + assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_SIZE); + } + } +} + +void push(uint32_t val) { + Reg[ESP].u -= 4; + trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); + trace(90, "run") << "pushing value 0x" << HEXWORD << val << end(); + write_mem_u32(Reg[ESP].u, val); } void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32_t offset, uint32_t e_ehsize) { @@ -104,6 +134,8 @@ void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32 const int CODE_START = 0x08048000; const int SEGMENT_SIZE = 0x1000; const int AFTER_STACK = 0x0804c000; +const int ARGV_POINTER_SEGMENT = 0x0804d000; +const int ARGV_DATA_SEGMENT = 0x0804e000; :(code) void initialize_mem() { Mem.resize(AFTER_STACK - CODE_START); diff --git a/subx/013direct_addressing.cc b/subx/013direct_addressing.cc index 54cd096f..2211a7a5 100644 --- a/subx/013direct_addressing.cc +++ b/subx/013direct_addressing.cc @@ -367,13 +367,6 @@ case 0x57: { // push r32 to stack push(Reg[reg].u); break; } -:(code) -void push(uint32_t val) { - Reg[ESP].u -= 4; - trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); - trace(90, "run") << "pushing value 0x" << HEXWORD << val << end(); - write_mem_u32(Reg[ESP].u, val); -} //:: pop diff --git a/subx/014indirect_addressing.cc b/subx/014indirect_addressing.cc index ede192fc..344da8de 100644 --- a/subx/014indirect_addressing.cc +++ b/subx/014indirect_addressing.cc @@ -363,7 +363,7 @@ put(name, "8b", "copy rm32 to r32"); == 0x1 # code segment # op ModR/M SIB displacement immediate 8b 18 # copy *EAX to EBX -# ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) +# ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX) == 0x60 # data segment af 00 00 00 # 0xaf +run: copy r/m32 to EBX @@ -381,6 +381,65 @@ case 0x8b: { // copy r32 to r/m32 break; } +//: + +:(before "End Initialize Op Names(name)") +put(name, "88", "copy r8 (lowermost byte of r32) to r8/m8-at-r32"); + +:(scenario copy_r8_to_mem_at_r32) +% Reg[EBX].i = 0xafafafaf; +% Reg[EAX].i = 0x60; +== 0x1 +# op ModR/M SIB displacement immediate + 88 18 # copy just the lowermost byte of EBX to the byte at *EAX +# ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX) ++run: copy lowermost byte of EBX to r8/m8-at-r32 ++run: effective address is 0x60 (EAX) ++run: storing 0xaf +% CHECK_EQ(0x000000af, read_mem_u32(0x60)); + +:(before "End Single-Byte Opcodes") +case 0x88: { // copy r/m8 to r8 + uint8_t modrm = next(); + uint8_t reg2 = (modrm>>3)&0x7; + trace(90, "run") << "copy lowermost byte of " << rname(reg2) << " to r8/m8-at-r32" << end(); + // use unsigned to zero-extend 8-bit value to 32 bits + uint8_t* arg1 = reinterpret_cast(effective_address(modrm)); + *arg1 = Reg[reg2].u; + trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg1) << end(); + break; +} + +//: + +:(before "End Initialize Op Names(name)") +put(name, "8a", "copy r8/m8-at-r32 to r8 (lowermost byte of r32)"); + +:(scenario copy_mem_at_r32_to_r8) +% Reg[EBX].i = 0xaf; +% Reg[EAX].i = 0x60; +== 0x1 +# op ModR/M SIB displacement immediate + 8a 18 # copy just the byte at *EAX to lowermost byte of EBX (clearing remaining bytes) +# ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX) +== 0x60 # data segment +af ff ff ff # 0xaf with more data in following bytes ++run: copy r8/m8-at-r32 to lowermost byte of EBX ++run: effective address is 0x60 (EAX) ++run: storing 0xaf + +:(before "End Single-Byte Opcodes") +case 0x8a: { // copy r/m8 to r8 + uint8_t modrm = next(); + uint8_t reg1 = (modrm>>3)&0x7; + trace(90, "run") << "copy r8/m8-at-r32 to lowermost byte of " << rname(reg1) << end(); + // use unsigned to zero-extend 8-bit value to 32 bits + uint8_t* arg2 = reinterpret_cast(effective_address(modrm)); + Reg[reg1].u = static_cast(*arg2); + trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg2) << end(); + break; +} + //:: jump :(before "End Initialize Op Names(name)") diff --git a/subx/030---operands.cc b/subx/030---operands.cc index aa3f2ded..3e103b66 100644 --- a/subx/030---operands.cc +++ b/subx/030---operands.cc @@ -271,6 +271,7 @@ word hex_byte_text(uint8_t val) { string hex_byte_to_string(uint8_t val) { ostringstream out; + // uint8_t prints without padding, but int8_t will expand to 32 bits again out << HEXBYTE << NUM(val); return out.str(); } diff --git a/subx/031check_operands.cc b/subx/031check_operands.cc index 1e840a81..12013287 100644 --- a/subx/031check_operands.cc +++ b/subx/031check_operands.cc @@ -192,7 +192,9 @@ void init_permitted_operands() { put(Permitted_operands, "39", 0x01); put(Permitted_operands, "3b", 0x01); // copy + put(Permitted_operands, "88", 0x01); put(Permitted_operands, "89", 0x01); + put(Permitted_operands, "8a", 0x01); put(Permitted_operands, "8b", 0x01); // swap put(Permitted_operands, "87", 0x01); diff --git a/subx/ex9 b/subx/ex9 new file mode 100755 index 00000000..c7f1e9e6 Binary files /dev/null and b/subx/ex9 differ diff --git a/subx/ex9.subx b/subx/ex9.subx new file mode 100644 index 00000000..b8482c43 --- /dev/null +++ b/subx/ex9.subx @@ -0,0 +1,65 @@ +# Example reading commandline arguments: compute length of first arg. +# +# To run: +# $ subx translate ex9.subx ex9 +# $ subx run ex9 abc d e f g h +# Expected result: +# $ echo $? +# 3 # length of 'abc' +# +# At the start of a SubX program: +# argc: *ESP +# argv: *(ESP+4) +# argv[0]: *argv ("ex9" in this case) +# argv[1]: *(argv+4) +# ... +# Locals start from ESP-4 downwards. + +== 0x08048054 # code segment, after leaving room for ELF header and segment headers +# instruction effective address operand displacement immediate +# op subop mod rm32 base index scale r32 +# 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes + # var s = argv[1] (EBX) + # var s = argv + 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 3/r32/EBX 4/disp8 . # copy *(ESP+4) to EBX + # s = *(s+4) + 8b/copy 1/mod/*+disp8 3/rm32/EBX . . . 3/r32/EBX 4/disp8 . # copy *(EBX+4) to EBX + # call ascii_length(EBX) + # prepare call + 55/push . . . . . . . . # push EBP + 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP + # push args + 53/push . . . . . . . . # push EBX + # call + e8/call . . . . . . ascii_length/disp32 + # discard args + 5a/pop . . . . . . . . # pop into EDX + # clean up after call + 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP + 5d/pop . . . . . . . . # pop to EBP + + # exit(EAX) + 89/copy 3/mod/direct 3/rm32/EBX . . . 0/r32/EAX . . # copy EAX to EBX + b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX + cd/syscall . . . . . . . 0x80/imm8 # int 80h + +ascii_length: # (s) + # initialize s (EDX) + 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 2/r32/EDX 4/disp8 # copy *(ESP+4) to EDX + # var result = 0 (EAX) + b8/copy . . . . . . . 0/imm32 # copy 1 to EAX +$al_loop: + # var c = *s (ECX) + 8a/copy 0/mod/* 2/rm32/EDX . . . 1/r32/ECX . . # copy byte at *EDX to lower byte of ECX + # if c == '\0' break + 81 7/subop/compare 3/mod/direct 1/rm32/ECX . . . . . 0/imm32 # compare ECX with 0 + 74/jump-if-zero . . . . . . . $al_ret/disp8 # jump if equal + # ++s + 81 0/subop/add 3/mod/direct 2/rm32/EDX . . . . . 1/imm32 # add 1 to EDX + # ++result + 81 0/subop/add 3/mod/direct 0/rm32/EAX . . . . . 1/imm32 # add 1 to EAX + # loop + eb/jump . . . . . . . $al_loop/disp8 # jump $al_loop +$al_ret: + # return (result in EAX) + c3/return diff --git a/subx/run b/subx/run index cd484787..277ce6d6 100755 --- a/subx/run +++ b/subx/run @@ -3,9 +3,9 @@ if [ $# -eq 0 ] then - echo "run " + echo "run " exit 1 fi -CFLAGS=-g subx run $1 +CFLAGS=-g subx run $* exit $? -- cgit 1.4.1-2-gfad0