about summary refs log tree commit diff stats
path: root/subx
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2018-08-30 01:15:45 -0700
committerKartik Agaram <vc@akkartik.com>2018-08-30 01:15:45 -0700
commitf1b3d7b96749165d771b279b56cc05447b7db3e0 (patch)
tree0b9ce4b47746827ac9703bdf10d4976c98ae058d /subx
parent51e3e6cec3aea2589513e946f187d9c4bc8eadb3 (diff)
downloadmu-f1b3d7b96749165d771b279b56cc05447b7db3e0.tar.gz
4527 - reading commandline arguments
The new example ex9 doesn't yet work natively.

In the process I've emulated the kernel's role in providing args, implemented
a couple of instructions acting on 8-bit operands (useful for ASCII string
operations), and begun the start of the standard library (ascii_length
is the same as strlen).

At the level of SubX we're just only going to support ASCII.
Diffstat (limited to 'subx')
-rw-r--r--subx/012elf.cc40
-rw-r--r--subx/013direct_addressing.cc7
-rw-r--r--subx/014indirect_addressing.cc61
-rw-r--r--subx/030---operands.cc1
-rw-r--r--subx/031check_operands.cc2
-rwxr-xr-xsubx/ex9bin0 -> 147 bytes
-rw-r--r--subx/ex9.subx65
-rwxr-xr-xsubx/run4
8 files changed, 166 insertions, 14 deletions
diff --git a/subx/012elf.cc b/subx/012elf.cc
index 58d2cde8..78131090 100644
--- a/subx/012elf.cc
+++ b/subx/012elf.cc
@@ -10,7 +10,7 @@ if (is_equal(argv[1], "run")) {
   cerr << std::hex;
   initialize_mem();
   Mem_offset = CODE_START;
-  load_elf(argv[2]);
+  load_elf(argv[2], argc, argv);
   while (EIP < End_of_program)  // weak final-gasp termination check
     run_one_instruction();
   trace(90, "load") << "executed past end of the world: " << EIP << " vs " << End_of_program << end();
@@ -18,7 +18,7 @@ if (is_equal(argv[1], "run")) {
 }
 
 :(code)
-void load_elf(const string& filename) {
+void load_elf(const string& filename, int argc, char* argv[]) {
   int fd = open(filename.c_str(), O_RDONLY);
   if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die();
   off_t size = lseek(fd, 0, SEEK_END);
@@ -27,11 +27,11 @@ void load_elf(const string& filename) {
   if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die();
   ssize_t read_size = read(fd, elf_contents, size);
   if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die();
-  load_elf_contents(elf_contents, size);
+  load_elf_contents(elf_contents, size, argc, argv);
   free(elf_contents);
 }
 
-void load_elf_contents(uint8_t* elf_contents, size_t size) {
+void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) {
   uint8_t magic[5] = {0};
   memcpy(magic, elf_contents, 4);
   if (memcmp(magic, "\177ELF", 4) != 0)
@@ -65,6 +65,36 @@ void load_elf_contents(uint8_t* elf_contents, size_t size) {
   Reg[ESP].u = AFTER_STACK;
   Reg[EBP].u = 0;
   EIP = e_entry;
+
+  // initialize args on stack
+  // no envp for now
+//?   cerr << ARGV_POINTER_SEGMENT << " at " << Reg[ESP].u-4 << '\n';
+  push(ARGV_POINTER_SEGMENT);
+//?   cerr << argc-2 << " at " << Reg[ESP].u-4 << '\n';
+  push(argc-/*skip 'subx_bin' and 'run'*/2);
+  // initialize arg data
+  // we wastefully use 2 whole pages of memory for this
+  uint32_t argv_data = ARGV_DATA_SEGMENT;
+  uint32_t argv_pointers = ARGV_POINTER_SEGMENT;
+  for (int i = /*skip 'subx_bin' and 'run'*/2;  i < argc;  ++i) {
+//?     cerr << "pointer: " << argv_pointers << " => " << argv_data << '\n';
+    write_mem_u32(argv_pointers, argv_data);
+    argv_pointers += sizeof(uint32_t);
+    assert(argv_pointers < ARGV_POINTER_SEGMENT + SEGMENT_SIZE);
+    for (size_t j = 0;  j <= strlen(argv[i]);  ++j) {
+//?       cerr << "  data: " << argv[i][j] << " (" << NUM(argv[i][j]) << ")\n";
+      write_mem_u8(argv_data, argv[i][j]);
+      argv_data += sizeof(char);
+      assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_SIZE);
+    }
+  }
+}
+
+void push(uint32_t val) {
+  Reg[ESP].u -= 4;
+  trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
+  trace(90, "run") << "pushing value 0x" << HEXWORD << val << end();
+  write_mem_u32(Reg[ESP].u, val);
 }
 
 void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32_t offset, uint32_t e_ehsize) {
@@ -104,6 +134,8 @@ void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32
 const int CODE_START = 0x08048000;
 const int SEGMENT_SIZE = 0x1000;
 const int AFTER_STACK = 0x0804c000;
+const int ARGV_POINTER_SEGMENT = 0x0804d000;
+const int ARGV_DATA_SEGMENT = 0x0804e000;
 :(code)
 void initialize_mem() {
   Mem.resize(AFTER_STACK - CODE_START);
diff --git a/subx/013direct_addressing.cc b/subx/013direct_addressing.cc
index 54cd096f..2211a7a5 100644
--- a/subx/013direct_addressing.cc
+++ b/subx/013direct_addressing.cc
@@ -367,13 +367,6 @@ case 0x57: {  // push r32 to stack
   push(Reg[reg].u);
   break;
 }
-:(code)
-void push(uint32_t val) {
-  Reg[ESP].u -= 4;
-  trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
-  trace(90, "run") << "pushing value 0x" << HEXWORD << val << end();
-  write_mem_u32(Reg[ESP].u, val);
-}
 
 //:: pop
 
diff --git a/subx/014indirect_addressing.cc b/subx/014indirect_addressing.cc
index ede192fc..344da8de 100644
--- a/subx/014indirect_addressing.cc
+++ b/subx/014indirect_addressing.cc
@@ -363,7 +363,7 @@ put(name, "8b", "copy rm32 to r32");
 == 0x1  # code segment
 # op  ModR/M  SIB   displacement  immediate
   8b  18                                      # copy *EAX to EBX
-# ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX)
+# ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
 == 0x60  # data segment
 af 00 00 00  # 0xaf
 +run: copy r/m32 to EBX
@@ -381,6 +381,65 @@ case 0x8b: {  // copy r32 to r/m32
   break;
 }
 
+//:
+
+:(before "End Initialize Op Names(name)")
+put(name, "88", "copy r8 (lowermost byte of r32) to r8/m8-at-r32");
+
+:(scenario copy_r8_to_mem_at_r32)
+% Reg[EBX].i = 0xafafafaf;
+% Reg[EAX].i = 0x60;
+== 0x1
+# op  ModR/M  SIB   displacement  immediate
+  88  18                                      # copy just the lowermost byte of EBX to the byte at *EAX
+# ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
++run: copy lowermost byte of EBX to r8/m8-at-r32
++run: effective address is 0x60 (EAX)
++run: storing 0xaf
+% CHECK_EQ(0x000000af, read_mem_u32(0x60));
+
+:(before "End Single-Byte Opcodes")
+case 0x88: {  // copy r/m8 to r8
+  uint8_t modrm = next();
+  uint8_t reg2 = (modrm>>3)&0x7;
+  trace(90, "run") << "copy lowermost byte of " << rname(reg2) << " to r8/m8-at-r32" << end();
+  // use unsigned to zero-extend 8-bit value to 32 bits
+  uint8_t* arg1 = reinterpret_cast<uint8_t*>(effective_address(modrm));
+  *arg1 = Reg[reg2].u;
+  trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg1) << end();
+  break;
+}
+
+//:
+
+:(before "End Initialize Op Names(name)")
+put(name, "8a", "copy r8/m8-at-r32 to r8 (lowermost byte of r32)");
+
+:(scenario copy_mem_at_r32_to_r8)
+% Reg[EBX].i = 0xaf;
+% Reg[EAX].i = 0x60;
+== 0x1
+# op  ModR/M  SIB   displacement  immediate
+  8a  18                                      # copy just the byte at *EAX to lowermost byte of EBX (clearing remaining bytes)
+# ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX)
+== 0x60  # data segment
+af ff ff ff  # 0xaf with more data in following bytes
++run: copy r8/m8-at-r32 to lowermost byte of EBX
++run: effective address is 0x60 (EAX)
++run: storing 0xaf
+
+:(before "End Single-Byte Opcodes")
+case 0x8a: {  // copy r/m8 to r8
+  uint8_t modrm = next();
+  uint8_t reg1 = (modrm>>3)&0x7;
+  trace(90, "run") << "copy r8/m8-at-r32 to lowermost byte of " << rname(reg1) << end();
+  // use unsigned to zero-extend 8-bit value to 32 bits
+  uint8_t* arg2 = reinterpret_cast<uint8_t*>(effective_address(modrm));
+  Reg[reg1].u = static_cast<uint32_t>(*arg2);
+  trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg2) << end();
+  break;
+}
+
 //:: jump
 
 :(before "End Initialize Op Names(name)")
diff --git a/subx/030---operands.cc b/subx/030---operands.cc
index aa3f2ded..3e103b66 100644
--- a/subx/030---operands.cc
+++ b/subx/030---operands.cc
@@ -271,6 +271,7 @@ word hex_byte_text(uint8_t val) {
 
 string hex_byte_to_string(uint8_t val) {
   ostringstream out;
+  // uint8_t prints without padding, but int8_t will expand to 32 bits again
   out << HEXBYTE << NUM(val);
   return out.str();
 }
diff --git a/subx/031check_operands.cc b/subx/031check_operands.cc
index 1e840a81..12013287 100644
--- a/subx/031check_operands.cc
+++ b/subx/031check_operands.cc
@@ -192,7 +192,9 @@ void init_permitted_operands() {
   put(Permitted_operands, "39", 0x01);
   put(Permitted_operands, "3b", 0x01);
   // copy
+  put(Permitted_operands, "88", 0x01);
   put(Permitted_operands, "89", 0x01);
+  put(Permitted_operands, "8a", 0x01);
   put(Permitted_operands, "8b", 0x01);
   // swap
   put(Permitted_operands, "87", 0x01);
diff --git a/subx/ex9 b/subx/ex9
new file mode 100755
index 00000000..c7f1e9e6
--- /dev/null
+++ b/subx/ex9
Binary files differdiff --git a/subx/ex9.subx b/subx/ex9.subx
new file mode 100644
index 00000000..b8482c43
--- /dev/null
+++ b/subx/ex9.subx
@@ -0,0 +1,65 @@
+# Example reading commandline arguments: compute length of first arg.
+#
+# To run:
+#   $ subx translate ex9.subx ex9
+#   $ subx run ex9 abc d e f g h
+# Expected result:
+#   $ echo $?
+#   3  # length of 'abc'
+#
+# At the start of a SubX program:
+#   argc: *ESP
+#   argv: *(ESP+4)
+#   argv[0]: *argv ("ex9" in this case)
+#   argv[1]: *(argv+4)
+#   ...
+# Locals start from ESP-4 downwards.
+
+== 0x08048054  # code segment, after leaving room for ELF header and segment headers
+# instruction                     effective address                                                   operand     displacement    immediate
+# op          subop               mod             rm32          base        index         scale       r32
+# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+  # var s = argv[1] (EBX)
+    # var s = argv
+  8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              3/r32/EBX   4/disp8         .                       # copy *(ESP+4) to EBX
+    # s = *(s+4)
+  8b/copy                         1/mod/*+disp8   3/rm32/EBX    .           .             .           3/r32/EBX   4/disp8         .                       # copy *(EBX+4) to EBX
+  # call ascii_length(EBX)
+    # prepare call
+  55/push                         .               .             .           .             .           .           .               .                       # push EBP
+  89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                       # copy ESP to EBP
+    # push args
+  53/push                         .               .             .           .             .           .           .               .                       # push EBX
+    # call
+  e8/call                         .               .             .           .             .           .           ascii_length/disp32
+    # discard args
+  5a/pop                          .               .             .           .             .           .           .               .                       # pop into EDX
+    # clean up after call
+  89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                       # copy EBP to ESP
+  5d/pop                          .               .             .           .             .           .           .               .                       # pop to EBP
+
+  # exit(EAX)
+  89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                       # copy EAX to EBX
+  b8/copy                         .               .             .           .             .           .           .               1/imm32/exit            # copy 1 to EAX
+  cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+
+ascii_length:  # (s)
+  # initialize s (EDX)
+  8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8                                 # copy *(ESP+4) to EDX
+  # var result = 0 (EAX)
+  b8/copy                         .               .             .           .             .           .           .               0/imm32                 # copy 1 to EAX
+$al_loop:
+  # var c = *s (ECX)
+  8a/copy                         0/mod/*         2/rm32/EDX    .           .             .           1/r32/ECX   .               .                       # copy byte at *EDX to lower byte of ECX
+  # if c == '\0' break
+  81          7/subop/compare     3/mod/direct    1/rm32/ECX    .           .             .           .           .               0/imm32                 # compare ECX with 0
+  74/jump-if-zero                 .               .             .           .             .           .           .               $al_ret/disp8           # jump if equal
+  # ++s
+  81          0/subop/add         3/mod/direct    2/rm32/EDX    .           .             .           .           .               1/imm32                 # add 1 to EDX
+  # ++result
+  81          0/subop/add         3/mod/direct    0/rm32/EAX    .           .             .           .           .               1/imm32                 # add 1 to EAX
+  # loop
+  eb/jump                         .               .             .           .             .           .           .               $al_loop/disp8          # jump $al_loop
+$al_ret:
+  # return (result in EAX)
+  c3/return
diff --git a/subx/run b/subx/run
index cd484787..277ce6d6 100755
--- a/subx/run
+++ b/subx/run
@@ -3,9 +3,9 @@
 
 if [ $# -eq 0 ]
 then
-  echo "run <binary>"
+  echo "run <binary> <args>"
   exit 1
 fi
 
-CFLAGS=-g subx run $1
+CFLAGS=-g subx run $*
 exit $?