From 5a2cb154eb016aa8b84ad939f63eb56dd7a0a90f Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Tue, 2 Oct 2018 01:21:01 -0700 Subject: 4649 --- html/subx/001help.cc.html | 20 +- html/subx/003trace.cc.html | 30 +- html/subx/003trace.test.cc.html | 36 +- html/subx/010---vm.cc.html | 454 +++++++++++-------- html/subx/011run.cc.html | 446 ++++++++++--------- html/subx/012elf.cc.html | 314 +++++++------- html/subx/013direct_addressing.cc.html | 251 +++++------ html/subx/014indirect_addressing.cc.html | 290 ++++++------- html/subx/015immediate_addressing.cc.html | 150 +++---- html/subx/016index_addressing.cc.html | 56 +-- html/subx/017jump_disp8.cc.html | 42 +- html/subx/018jump_disp16.cc.html | 32 +- html/subx/019functions.cc.html | 34 +- html/subx/020syscalls.cc.html | 159 +++---- html/subx/028translate.cc.html | 310 +++++++------ html/subx/030---operands.cc.html | 423 +++++++++--------- html/subx/031check_operands.cc.html | 78 ++-- html/subx/032check_operand_bounds.cc.html | 12 +- html/subx/034compute_segment_address.cc.html | 211 ++++++--- html/subx/035labels.cc.html | 483 +++++++++++---------- html/subx/036global_variables.cc.html | 200 +++++---- html/subx/038---literal_strings.cc.html | 420 +++++++++--------- html/subx/040---tests.cc.html | 18 +- html/subx/050write_stderr.subx.html | 97 +++++ html/subx/051test.subx.html | 124 ++++++ html/subx/052kernel_string_equal.subx.html | 320 ++++++++++++++ html/subx/apps/crenshaw2-1.subx.html | 171 ++------ html/subx/apps/factorial.subx.html | 189 ++------ html/subx/examples/ex1.1.subx.html | 79 ---- html/subx/examples/ex1.2.subx.html | 79 ---- html/subx/examples/ex1.subx.html | 79 ++++ html/subx/examples/ex10.subx.html | 105 +++-- html/subx/examples/ex11.subx.html | 627 ++++++++++++++------------- html/subx/examples/ex12.subx.html | 103 +++++ html/subx/examples/ex2.subx.html | 13 +- html/subx/examples/ex3.subx.html | 16 +- html/subx/examples/ex4.subx.html | 20 +- html/subx/examples/ex5.subx.html | 22 +- html/subx/examples/ex6.subx.html | 14 +- html/subx/examples/ex7.subx.html | 34 +- html/subx/examples/ex8.subx.html | 32 +- html/subx/examples/ex9.subx.html | 59 ++- 42 files changed, 3688 insertions(+), 2964 deletions(-) create mode 100644 html/subx/050write_stderr.subx.html create mode 100644 html/subx/051test.subx.html create mode 100644 html/subx/052kernel_string_equal.subx.html delete mode 100644 html/subx/examples/ex1.1.subx.html delete mode 100644 html/subx/examples/ex1.2.subx.html create mode 100644 html/subx/examples/ex1.subx.html create mode 100644 html/subx/examples/ex12.subx.html (limited to 'html/subx') diff --git a/html/subx/001help.cc.html b/html/subx/001help.cc.html index aed0d399..63ed444d 100644 --- a/html/subx/001help.cc.html +++ b/html/subx/001help.cc.html @@ -133,13 +133,13 @@ if ('onhashchange' in window) { 73 " subx --help\n" 74 "- Convert a textual SubX program into a standard ELF binary that you can\n" 75 " run on your computer:\n" - 76 " subx translate <input 'source' file> <output ELF binary>\n" + 76 " subx translate input1.subx intput2.subx ... -o <output ELF binary>\n" 77 "- Run a SubX binary using SubX itself (for better error messages):\n" 78 " subx run <ELF binary>\n" 79 "Add '--trace' to any of these commands to also emit a trace, for debugging purposes.\n" 80 "However, options starting with '--' must always come before any other arguments.\n" 81 "\n" - 82 "To start learning how to write SubX programs, run:\n" + 82 "To start learning how to write SubX programs, run:\n" 83 " subx help\n" 84 ); 85 // End Help Texts @@ -152,9 +152,9 @@ if ('onhashchange' in window) { 92 93 bool starts_with(const string& s, const string& pat) { 94 string::const_iterator a=s.begin(), b=pat.begin(); - 95 for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + 95 for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) 96 if (*a != *b) return false; - 97 return b == pat.end(); + 97 return b == pat.end(); 98 } 99 100 //: I'll throw some style conventions here for want of a better place for them. @@ -206,7 +206,7 @@ if ('onhashchange' in window) { 146 //: yadda-yadda. Instead use this macro below to perform an unsafe cast to 147 //: signed. We'll just give up immediately if a container's ever too large. 148 //: Basically, Mu is not concerned about this being a little slower than it -149 //: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7) +149 //: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7) 150 //: 151 //: Addendum to corollary: We're going to uniformly use int everywhere, to 152 //: indicate that we're oblivious to number size, and since Clang on 32-bit @@ -286,17 +286,17 @@ if ('onhashchange' in window) { 226 // from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map 227 template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) { 228 typename T::iterator iter(map.find(key)); -229 if (iter == map.end()) { +229 if (iter == map.end()) { 230 cerr << "get couldn't find key '" << key << "'\n"; -231 assert(iter != map.end()); +231 assert(iter != map.end()); 232 } 233 return iter->second; 234 } 235 template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) { 236 typename T::const_iterator iter(map.find(key)); -237 if (iter == map.end()) { +237 if (iter == map.end()) { 238 cerr << "get couldn't find key '" << key << "'\n"; -239 assert(iter != map.end()); +239 assert(iter != map.end()); 240 } 241 return iter->second; 242 } @@ -305,7 +305,7 @@ if ('onhashchange' in window) { 245 return map[key]; 246 } 247 template<typename T> bool contains_key(T& map, typename T::key_type const& key) { -248 return map.find(key) != map.end(); +248 return map.find(key) != map.end(); 249 } 250 template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) { 251 return map[key]; diff --git a/html/subx/003trace.cc.html b/html/subx/003trace.cc.html index 25ba9b61..fb544db8 100644 --- a/html/subx/003trace.cc.html +++ b/html/subx/003trace.cc.html @@ -223,7 +223,7 @@ if ('onhashchange' in window) { 164 string trace_stream::readable_contents(string label) { 165 ostringstream output; 166 label = trim(label); -167 for (vector<trace_line>::iterator p = past_lines.begin(); p != past_lines.end(); ++p) +167 for (vector<trace_line>::iterator p = past_lines.begin(); p != past_lines.end(); ++p) 168 if (label.empty() || label == p->label) { 169 output << std::setw(4) << p->depth << ' ' << p->label << ": " << p->contents << '\n'; 170 } @@ -286,9 +286,9 @@ if ('onhashchange' in window) { 227 } 228 229 :(before "End Types") -230 struct end {}; +230 struct end {}; 231 :(code) -232 ostream& operator<<(ostream& os, end /*unused*/) { +232 ostream& operator<<(ostream& os, end /*unused*/) { 233 if (Trace_stream) Trace_stream->newline(); 234 return os; 235 } @@ -347,7 +347,7 @@ if ('onhashchange' in window) { 288 if (curr_expected_line == SIZE(expected_lines)) return true; 289 string label, contents; 290 split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); -291 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +291 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { 292 if (label != p->label) continue; 293 if (contents != trim(p->contents)) continue; 294 ++curr_expected_line; @@ -383,7 +383,7 @@ if ('onhashchange' in window) { 324 } 325 326 bool line_exists_anywhere(const string& label, const string& contents) { -327 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +327 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { 328 if (label != p->label) continue; 329 if (contents == trim(p->contents)) return true; 330 } @@ -397,7 +397,7 @@ if ('onhashchange' in window) { 338 int trace_count(string label, string line) { 339 if (!Trace_stream) return 0; 340 long result = 0; -341 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +341 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { 342 if (label == p->label) { 343 if (line == "" || trim(line) == trim(p->contents)) 344 ++result; @@ -409,7 +409,7 @@ if ('onhashchange' in window) { 350 int trace_count_prefix(string label, string prefix) { 351 if (!Trace_stream) return 0; 352 long result = 0; -353 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +353 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { 354 if (label == p->label) { 355 if (starts_with(trim(p->contents), trim(prefix))) 356 ++result; @@ -425,7 +425,7 @@ if ('onhashchange' in window) { 366 bool trace_doesnt_contain(string expected) { 367 vector<string> tmp = split_first(expected, ": "); 368 if (SIZE(tmp) == 1) { -369 raise << expected << ": missing label or contents in trace line\n" << end(); +369 raise << expected << ": missing label or contents in trace line\n" << end(); 370 assert(false); 371 } 372 return trace_doesnt_contain(tmp.at(0), tmp.at(1)); @@ -435,13 +435,13 @@ if ('onhashchange' in window) { 376 vector<string> result; 377 size_t begin=0, end=s.find(delim); 378 while (true) { -379 if (end == string::npos) { +379 if (end == string::npos) { 380 result.push_back(string(s, begin, string::npos)); 381 break; 382 } 383 result.push_back(string(s, begin, end-begin)); 384 begin = end+SIZE(delim); -385 end = s.find(delim, begin); +385 end = s.find(delim, begin); 386 } 387 return result; 388 } @@ -449,19 +449,19 @@ if ('onhashchange' in window) { 390 vector<string> split_first(string s, string delim) { 391 vector<string> result; 392 size_t end=s.find(delim); -393 result.push_back(string(s, 0, end)); -394 if (end != string::npos) +393 result.push_back(string(s, 0, end)); +394 if (end != string::npos) 395 result.push_back(string(s, end+SIZE(delim), string::npos)); 396 return result; 397 } 398 399 string trim(const string& s) { 400 string::const_iterator first = s.begin(); -401 while (first != s.end() && isspace(*first)) +401 while (first != s.end() && isspace(*first)) 402 ++first; -403 if (first == s.end()) return ""; +403 if (first == s.end()) return ""; 404 -405 string::const_iterator last = --s.end(); +405 string::const_iterator last = --s.end(); 406 while (last != s.begin() && isspace(*last)) 407 --last; 408 ++last; diff --git a/html/subx/003trace.test.cc.html b/html/subx/003trace.test.cc.html index 7315ab55..9a2d980c 100644 --- a/html/subx/003trace.test.cc.html +++ b/html/subx/003trace.test.cc.html @@ -57,66 +57,66 @@ if ('onhashchange' in window) {
   1 void test_trace_check_compares() {
-  2   trace("test layer") << "foo" << end();
+  2   trace("test layer") << "foo" << end();
   3   CHECK_TRACE_CONTENTS("test layer: foo");
   4 }
   5 
   6 void test_trace_check_ignores_other_layers() {
-  7   trace("test layer 1") << "foo" << end();
-  8   trace("test layer 2") << "bar" << end();
+  7   trace("test layer 1") << "foo" << end();
+  8   trace("test layer 2") << "bar" << end();
   9   CHECK_TRACE_CONTENTS("test layer 1: foo");
  10   CHECK_TRACE_DOESNT_CONTAIN("test layer 2: foo");
  11 }
  12 
  13 void test_trace_check_ignores_leading_whitespace() {
- 14   trace("test layer 1") << " foo" << end();
+ 14   trace("test layer 1") << " foo" << end();
  15   CHECK_EQ(trace_count("test layer 1", /*too little whitespace*/"foo"), 1);
  16   CHECK_EQ(trace_count("test layer 1", /*too much whitespace*/"  foo"), 1);
  17 }
  18 
  19 void test_trace_check_ignores_other_lines() {
- 20   trace("test layer 1") << "foo" << end();
- 21   trace("test layer 1") << "bar" << end();
+ 20   trace("test layer 1") << "foo" << end();
+ 21   trace("test layer 1") << "bar" << end();
  22   CHECK_TRACE_CONTENTS("test layer 1: foo");
  23 }
  24 
  25 void test_trace_check_ignores_other_lines2() {
- 26   trace("test layer 1") << "foo" << end();
- 27   trace("test layer 1") << "bar" << end();
+ 26   trace("test layer 1") << "foo" << end();
+ 27   trace("test layer 1") << "bar" << end();
  28   CHECK_TRACE_CONTENTS("test layer 1: bar");
  29 }
  30 
  31 void test_trace_ignores_trailing_whitespace() {
- 32   trace("test layer 1") << "foo\n" << end();
+ 32   trace("test layer 1") << "foo\n" << end();
  33   CHECK_TRACE_CONTENTS("test layer 1: foo");
  34 }
  35 
  36 void test_trace_ignores_trailing_whitespace2() {
- 37   trace("test layer 1") << "foo " << end();
+ 37   trace("test layer 1") << "foo " << end();
  38   CHECK_TRACE_CONTENTS("test layer 1: foo");
  39 }
  40 
  41 void test_trace_orders_across_layers() {
- 42   trace("test layer 1") << "foo" << end();
- 43   trace("test layer 2") << "bar" << end();
- 44   trace("test layer 1") << "qux" << end();
+ 42   trace("test layer 1") << "foo" << end();
+ 43   trace("test layer 2") << "bar" << end();
+ 44   trace("test layer 1") << "qux" << end();
  45   CHECK_TRACE_CONTENTS("test layer 1: foo^Dtest layer 2: bar^Dtest layer 1: qux^D");
  46 }
  47 
  48 void test_trace_supports_count() {
- 49   trace("test layer 1") << "foo" << end();
- 50   trace("test layer 1") << "foo" << end();
+ 49   trace("test layer 1") << "foo" << end();
+ 50   trace("test layer 1") << "foo" << end();
  51   CHECK_EQ(trace_count("test layer 1", "foo"), 2);
  52 }
  53 
  54 void test_trace_supports_count2() {
- 55   trace("test layer 1") << "foo" << end();
- 56   trace("test layer 1") << "bar" << end();
+ 55   trace("test layer 1") << "foo" << end();
+ 56   trace("test layer 1") << "bar" << end();
  57   CHECK_EQ(trace_count("test layer 1"), 2);
  58 }
  59 
  60 void test_trace_count_ignores_trailing_whitespace() {
- 61   trace("test layer 1") << "foo\n" << end();
+ 61   trace("test layer 1") << "foo\n" << end();
  62   CHECK_EQ(trace_count("test layer 1", "foo"), 1);
  63 }
  64 
diff --git a/html/subx/010---vm.cc.html b/html/subx/010---vm.cc.html
index 86ac7aec..ad6efb36 100644
--- a/html/subx/010---vm.cc.html
+++ b/html/subx/010---vm.cc.html
@@ -131,7 +131,7 @@ if ('onhashchange' in window) {
  70   /* arg1 and arg2 must be signed */ \
  71   int64_t tmp = arg1 op arg2; \
  72   arg1 = arg1 op arg2; \
- 73   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
+ 73   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
  74   SF = (arg1 < 0); \
  75   ZF = (arg1 == 0); \
  76   OF = (arg1 != tmp); \
@@ -142,7 +142,7 @@ if ('onhashchange' in window) {
  81 #define BINARY_BITWISE_OP(op, arg1, arg2) { \
  82   /* arg1 and arg2 must be unsigned */ \
  83   arg1 = arg1 op arg2; \
- 84   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
+ 84   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
  85   SF = (arg1 >> 31); \
  86   ZF = (arg1 == 0); \
  87   OF = false; \
@@ -150,178 +150,286 @@ if ('onhashchange' in window) {
  89 
  90 //:: simulated RAM
  91 
- 92 :(before "End Globals")
- 93 vector<uint8_t> Mem;
- 94 uint32_t Mem_offset = 0;
- 95 uint32_t End_of_program = 0;
- 96 :(before "End Reset")
- 97 Mem.clear();
- 98 Mem.resize(1024);
- 99 Mem_offset = 0;
-100 End_of_program = 0;
-101 :(code)
-102 // These helpers depend on Mem being laid out contiguously (so you can't use a
-103 // map, etc.) and on the host also being little-endian.
-104 inline uint8_t read_mem_u8(uint32_t addr) {
-105   return Mem.at(addr-Mem_offset);
-106 }
-107 inline int8_t read_mem_i8(uint32_t addr) {
-108   return static_cast<int8_t>(Mem.at(addr-Mem_offset));
-109 }
-110 inline uint32_t read_mem_u32(uint32_t addr) {
-111   return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
-112 }
-113 inline int32_t read_mem_i32(uint32_t addr) {
-114   return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
-115 }
-116 
-117 inline uint8_t* mem_addr_u8(uint32_t addr) {
-118   return &Mem.at(addr-Mem_offset);
-119 }
-120 inline int8_t* mem_addr_i8(uint32_t addr) {
-121   return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
-122 }
-123 inline char* mem_addr_string(uint32_t addr) {
-124   return reinterpret_cast<char*>(&Mem.at(addr-Mem_offset));
-125 }
-126 inline uint32_t* mem_addr_u32(uint32_t addr) {
-127   return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
-128 }
-129 inline int32_t* mem_addr_i32(uint32_t addr) {
-130   return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
-131 }
-132 
-133 inline void write_mem_u8(uint32_t addr, uint8_t val) {
-134   Mem.at(addr-Mem_offset) = val;
-135 }
-136 inline void write_mem_i8(uint32_t addr, int8_t val) {
-137   Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
-138 }
-139 inline void write_mem_u32(uint32_t addr, uint32_t val) {
-140   *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
-141 }
-142 inline void write_mem_i32(uint32_t addr, int32_t val) {
-143   *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
-144 }
-145 
-146 //:: core interpreter loop
-147 
-148 :(code)
-149 // skeleton of how x86 instructions are decoded
-150 void run_one_instruction() {
-151   uint8_t op=0, op2=0, op3=0;
-152   trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
-153 //?   dump_registers();
-154 //?   cerr << "inst: 0x" << EIP << " => ";
-155   op = next();
-156 //?   cerr << HEXBYTE << NUM(op) << '\n';
-157   switch (op) {
-158   case 0xf4:  // hlt
-159     EIP = End_of_program;
-160     break;
-161   // End Single-Byte Opcodes
-162   case 0x0f:
-163     switch(op2 = next()) {
-164     // End Two-Byte Opcodes Starting With 0f
-165     default:
-166       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
-167       DUMP("");
-168       exit(1);
-169     }
-170     break;
-171   case 0xf2:
-172     switch(op2 = next()) {
-173     // End Two-Byte Opcodes Starting With f2
-174     case 0x0f:
-175       switch(op3 = next()) {
-176       // End Three-Byte Opcodes Starting With f2 0f
-177       default:
-178         cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
-179         DUMP("");
-180         exit(1);
-181       }
-182       break;
-183     default:
-184       cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
-185       DUMP("");
-186       exit(1);
-187     }
-188     break;
-189   case 0xf3:
-190     switch(op2 = next()) {
-191     // End Two-Byte Opcodes Starting With f3
-192     case 0x0f:
-193       switch(op3 = next()) {
-194       // End Three-Byte Opcodes Starting With f3 0f
-195       default:
-196         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
-197         DUMP("");
-198         exit(1);
-199       }
-200       break;
-201     default:
-202       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
-203       DUMP("");
-204       exit(1);
-205     }
-206     break;
-207   default:
-208     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
-209     DUMP("");
-210     exit(1);
-211   }
-212 }
-213 
-214 inline uint8_t next() {
-215   return read_mem_u8(EIP++);
+ 92 :(before "End Types")
+ 93 const uint32_t INITIAL_SEGMENT_SIZE = 0x1000 - 1;
+ 94 // Subtract one just so we can start the first segment at address 1 without
+ 95 // overflowing the first segment. Other segments will learn to adjust.
+ 96 
+ 97 // Like in real-world Linux, we'll allocate RAM for our programs in disjoint
+ 98 // slabs called VMAs or Virtual Memory Areas.
+ 99 struct vma {
+100   uint32_t start;  // inclusive
+101   uint32_t end;  // exclusive
+102   vector<uint8_t> _data;
+103   vma(uint32_t s, uint32_t e) :start(s), end(e) {
+104     _data.resize(end-start);
+105   }
+106   vma(uint32_t s) :start(s), end(s+INITIAL_SEGMENT_SIZE) {
+107     _data.resize(end-start);
+108   }
+109   bool match(uint32_t a) {
+110     return a >= start && a < end;
+111   }
+112   bool match32(uint32_t a) {
+113     return a >= start && a+4 <= end;
+114   }
+115   uint8_t& data(uint32_t a) {
+116     assert(match(a));
+117     return _data.at(a-start);
+118   }
+119   void grow_until(uint32_t new_end_address) {
+120     if (new_end_address < end) return;
+121     // Ugly: vma knows about the global Memory list of vmas
+122     void sanity_check(uint32_t start, uint32_t end);
+123     sanity_check(start, new_end_address);
+124     end = new_end_address;
+125     _data.resize(new_end_address - start);
+126   }
+127   // End vma Methods
+128 };
+129 :(code)
+130 void sanity_check(uint32_t start, uint32_t end) {
+131   bool dup_found = false;
+132   for (int i = 0;  i < SIZE(Mem);  ++i) {
+133     const vma& curr = Mem.at(i);
+134     if (curr.start == start) {
+135       assert(!dup_found);
+136       dup_found = true;
+137     }
+138     else if (curr.start > start) {
+139       assert(curr.start > end);
+140     }
+141     else if (curr.start < start) {
+142       assert(curr.end < start);
+143     }
+144   }
+145 }
+146 
+147 :(before "End Globals")
+148 // RAM is made of VMAs.
+149 vector<vma> Mem;
+150 :(code)
+151 // The first 3 VMAs are special. When loading ELF binaries in later layers,
+152 // we'll assume that the first VMA is for code, the second is for data
+153 // (including the heap), and the third for the stack.
+154 void grow_code_segment(uint32_t new_end_address) {
+155   assert(!Mem.empty());
+156   Mem.at(0).grow_until(new_end_address);
+157 }
+158 void grow_data_segment(uint32_t new_end_address) {
+159   assert(SIZE(Mem) > 1);
+160   Mem.at(1).grow_until(new_end_address);
+161 }
+162 :(before "End Globals")
+163 uint32_t End_of_program = 0;  // when the program executes past this address in tests we'll stop the test
+164 // The stack grows downward. Can't increase its size for now.
+165 :(before "End Reset")
+166 Mem.clear();
+167 End_of_program = 0;
+168 :(code)
+169 // These helpers depend on Mem being laid out contiguously (so you can't use a
+170 // map, etc.) and on the host also being little-endian.
+171 inline uint8_t read_mem_u8(uint32_t addr) {
+172   uint8_t* handle = mem_addr_u8(addr);  // error messages get printed here
+173   return handle ? *handle : 0;
+174 }
+175 inline int8_t read_mem_i8(uint32_t addr) {
+176   return static_cast<int8_t>(read_mem_u8(addr));
+177 }
+178 inline uint32_t read_mem_u32(uint32_t addr) {
+179   uint32_t* handle = mem_addr_u32(addr);  // error messages get printed here
+180   return handle ? *handle : 0;
+181 }
+182 inline int32_t read_mem_i32(uint32_t addr) {
+183   return static_cast<int32_t>(read_mem_u32(addr));
+184 }
+185 
+186 inline uint8_t* mem_addr_u8(uint32_t addr) {
+187   uint8_t* result = NULL;
+188   for (int i = 0;  i < SIZE(Mem);  ++i) {
+189     if (Mem.at(i).match(addr)) {
+190       if (result)
+191         raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
+192       result = &Mem.at(i).data(addr);
+193     }
+194   }
+195   if (result == NULL)
+196     raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
+197   return result;
+198 }
+199 inline int8_t* mem_addr_i8(uint32_t addr) {
+200   return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
+201 }
+202 inline uint32_t* mem_addr_u32(uint32_t addr) {
+203   uint32_t* result = NULL;
+204   for (int i = 0;  i < SIZE(Mem);  ++i) {
+205     if (Mem.at(i).match32(addr)) {
+206       if (result)
+207         raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
+208       result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
+209     }
+210   }
+211   if (result == NULL) {
+212     raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
+213     raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
+214   }
+215   return result;
 216 }
-217 
-218 void dump_registers() {
-219   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
-220     if (i > 0) cerr << "; ";
-221     cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
-222   }
-223   cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
-224 }
-225 
-226 //: start tracking supported opcodes
-227 :(before "End Globals")
-228 map</*op*/string, string> name;
-229 map</*op*/string, string> name_0f;
-230 map</*op*/string, string> name_f3;
-231 map</*op*/string, string> name_f3_0f;
-232 :(before "End One-time Setup")
-233 init_op_names();
-234 :(code)
-235 void init_op_names() {
-236   put(name, "f4", "halt");
-237   // End Initialize Op Names(name)
-238 }
-239 
-240 :(before "End Help Special-cases(key)")
-241 if (key == "opcodes") {
-242   cerr << "Opcodes currently supported by SubX:\n";
-243   for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
-244     cerr << "  " << p->first << ": " << p->second << '\n';
-245   for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
-246     cerr << "  0f " << p->first << ": " << p->second << '\n';
-247   for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
-248     cerr << "  f3 " << p->first << ": " << p->second << '\n';
-249   for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
-250     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
-251   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
-252   return 0;
-253 }
-254 :(before "End Help Contents")
-255 cerr << "  opcodes\n";
-256 
-257 :(before "End Includes")
-258 #include <iomanip>
-259 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
-260 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
-261 // ugly that iostream doesn't print uint8_t as an integer
-262 #define NUM(X) static_cast<int>(X)
-263 #include <stdint.h>
+217 inline int32_t* mem_addr_i32(uint32_t addr) {
+218   return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
+219 }
+220 // helper for some syscalls. But read-only.
+221 inline const char* mem_addr_string(uint32_t addr) {
+222   return reinterpret_cast<const char*>(mem_addr_u8(addr));
+223 }
+224 
+225 inline void write_mem_u8(uint32_t addr, uint8_t val) {
+226   uint8_t* handle = mem_addr_u8(addr);
+227   if (handle != NULL) *handle = val;
+228 }
+229 inline void write_mem_i8(uint32_t addr, int8_t val) {
+230   int8_t* handle = mem_addr_i8(addr);
+231   if (handle != NULL) *handle = val;
+232 }
+233 inline void write_mem_u32(uint32_t addr, uint32_t val) {
+234   uint32_t* handle = mem_addr_u32(addr);
+235   if (handle != NULL) *handle = val;
+236 }
+237 inline void write_mem_i32(uint32_t addr, int32_t val) {
+238   int32_t* handle = mem_addr_i32(addr);
+239   if (handle != NULL) *handle = val;
+240 }
+241 
+242 inline bool already_allocated(uint32_t addr) {
+243   bool result = false;
+244   for (int i = 0;  i < SIZE(Mem);  ++i) {
+245     if (Mem.at(i).match(addr)) {
+246       if (result)
+247         raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
+248       result = true;
+249     }
+250   }
+251   return result;
+252 }
+253 
+254 //:: core interpreter loop
+255 
+256 :(code)
+257 // skeleton of how x86 instructions are decoded
+258 void run_one_instruction() {
+259   uint8_t op=0, op2=0, op3=0;
+260   trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
+261 //?   dump_registers();
+262 //?   cerr << "inst: 0x" << EIP << " => ";
+263   op = next();
+264 //?   cerr << HEXBYTE << NUM(op) << '\n';
+265   switch (op) {
+266   case 0xf4:  // hlt
+267     EIP = End_of_program;
+268     break;
+269   // End Single-Byte Opcodes
+270   case 0x0f:
+271     switch(op2 = next()) {
+272     // End Two-Byte Opcodes Starting With 0f
+273     default:
+274       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
+275       DUMP("");
+276       exit(1);
+277     }
+278     break;
+279   case 0xf2:
+280     switch(op2 = next()) {
+281     // End Two-Byte Opcodes Starting With f2
+282     case 0x0f:
+283       switch(op3 = next()) {
+284       // End Three-Byte Opcodes Starting With f2 0f
+285       default:
+286         cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
+287         DUMP("");
+288         exit(1);
+289       }
+290       break;
+291     default:
+292       cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
+293       DUMP("");
+294       exit(1);
+295     }
+296     break;
+297   case 0xf3:
+298     switch(op2 = next()) {
+299     // End Two-Byte Opcodes Starting With f3
+300     case 0x0f:
+301       switch(op3 = next()) {
+302       // End Three-Byte Opcodes Starting With f3 0f
+303       default:
+304         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
+305         DUMP("");
+306         exit(1);
+307       }
+308       break;
+309     default:
+310       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
+311       DUMP("");
+312       exit(1);
+313     }
+314     break;
+315   default:
+316     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
+317     DUMP("");
+318     exit(1);
+319   }
+320 }
+321 
+322 inline uint8_t next() {
+323   return read_mem_u8(EIP++);
+324 }
+325 
+326 void dump_registers() {
+327   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
+328     if (i > 0) cerr << "; ";
+329     cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
+330   }
+331   cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
+332 }
+333 
+334 //: start tracking supported opcodes
+335 :(before "End Globals")
+336 map</*op*/string, string> name;
+337 map</*op*/string, string> name_0f;
+338 map</*op*/string, string> name_f3;
+339 map</*op*/string, string> name_f3_0f;
+340 :(before "End One-time Setup")
+341 init_op_names();
+342 :(code)
+343 void init_op_names() {
+344   put(name, "f4", "halt");
+345   // End Initialize Op Names(name)
+346 }
+347 
+348 :(before "End Help Special-cases(key)")
+349 if (key == "opcodes") {
+350   cerr << "Opcodes currently supported by SubX:\n";
+351   for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
+352     cerr << "  " << p->first << ": " << p->second << '\n';
+353   for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
+354     cerr << "  0f " << p->first << ": " << p->second << '\n';
+355   for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
+356     cerr << "  f3 " << p->first << ": " << p->second << '\n';
+357   for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
+358     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
+359   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
+360   return 0;
+361 }
+362 :(before "End Help Contents")
+363 cerr << "  opcodes\n";
+364 
+365 :(before "End Includes")
+366 #include <iomanip>
+367 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
+368 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
+369 // ugly that iostream doesn't print uint8_t as an integer
+370 #define NUM(X) static_cast<int>(X)
+371 #include <stdint.h>
 
diff --git a/html/subx/011run.cc.html b/html/subx/011run.cc.html index c91c7fb0..f4049fcf 100644 --- a/html/subx/011run.cc.html +++ b/html/subx/011run.cc.html @@ -72,7 +72,7 @@ if ('onhashchange' in window) { 9 "Line-endings are significant; each line should contain a single\n" 10 "instruction, macro or directive.\n" 11 "\n" - 12 "Comments start with the '#' character. It should be at the start of a word\n" + 12 "Comments start with the '#' character. It should be at the start of a word\n" 13 "(start of line, or following a space).\n" 14 "\n" 15 "Each segment starts with a header line: a '==' delimiter followed by the\n" @@ -81,8 +81,8 @@ if ('onhashchange' in window) { 18 "The starting address for a segment has some finicky requirements. But just\n" 19 "start with a round number, and `subx` will try to guide you to a valid\n" 20 "configuration.\n" - 21 "A good rule of thumb is to try to start the first segment at the default\n" - 22 "address of 0x08048000, and to start each subsequent segment at least 0x1000\n" + 21 "A good rule of thumb is to try to start the first segment at the default\n" + 22 "address of 0x08048000, and to start each subsequent segment at least 0x1000\n" 23 "(most common page size) bytes after the last.\n" 24 "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n" 25 "segments further down.\n" @@ -146,14 +146,14 @@ if ('onhashchange' in window) { 83 void run(const string& text_bytes) { 84 program p; 85 istringstream in(text_bytes); - 86 parse(in, p); + 86 parse(in, p); 87 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately - 88 transform(p); + 88 transform(p); 89 if (trace_contains_errors()) return; - 90 load(p); + 90 load(p); 91 if (trace_contains_errors()) return; - 92 while (EIP < End_of_program) - 93 run_one_instruction(); + 92 while (EIP < End_of_program) + 93 run_one_instruction(); 94 } 95 96 //:: core data structures @@ -168,11 +168,11 @@ if ('onhashchange' in window) { 105 }; 106 :(before "struct program") 107 struct segment { -108 uint32_t start; +108 uint32_t start; 109 vector<line> lines; 110 // End segment Fields 111 segment() { -112 start = 0; +112 start = 0; 113 // End segment Constructor 114 } 115 }; @@ -185,22 +185,22 @@ if ('onhashchange' in window) { 122 :(before "struct line") 123 struct word { 124 string original; -125 string data; +125 string data; 126 vector<string> metadata; 127 }; 128 129 //:: parse 130 131 :(code) -132 void parse(istream& fin, program& out) { +132 void parse(istream& fin, program& out) { 133 vector<line> l; -134 trace(99, "parse") << "begin" << end(); +134 trace(99, "parse") << "begin" << end(); 135 while (has_data(fin)) { 136 string line_data; 137 line curr; 138 getline(fin, line_data); 139 curr.original = line_data; -140 trace(99, "parse") << "line: " << line_data << end(); +140 trace(99, "parse") << "line: " << line_data << end(); 141 // End Line Parsing Special-cases(line_data -> l) 142 istringstream lin(line_data); 143 while (has_data(lin)) { @@ -210,19 +210,19 @@ if ('onhashchange' in window) { 147 if (word_data[0] == '#') break; // comment 148 if (word_data == ".") continue; // comment token 149 if (word_data == "==") { -150 if (!l.empty()) { -151 assert(!out.segments.empty()); -152 trace(99, "parse") << "flushing to segment" << end(); -153 out.segments.back().lines.swap(l); -154 } -155 segment s; -156 string segment_title; -157 lin >> segment_title; -158 if (starts_with(segment_title, "0x")) -159 s.start = parse_int(segment_title); -160 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); -161 out.segments.push_back(s); -162 // todo? +150 flush(out, l); +151 string segment_title; +152 lin >> segment_title; +153 if (starts_with(segment_title, "0x")) { +154 segment s; +155 s.start = parse_int(segment_title); +156 sanity_check_program_segment(out, s.start); +157 if (trace_contains_errors()) continue; +158 trace(99, "parse") << "new segment from 0x" << HEXWORD << s.start << end(); +159 out.segments.push_back(s); +160 } +161 // End Segment Parsing Special-cases(segment_title) +162 // todo: segment segment metadata 163 break; // skip rest of line 164 } 165 if (word_data[0] == ':') { @@ -230,190 +230,228 @@ if ('onhashchange' in window) { 167 break; 168 } 169 curr.words.push_back(word()); -170 parse_word(word_data, curr.words.back()); +170 parse_word(word_data, curr.words.back()); 171 trace(99, "parse") << "word: " << to_string(curr.words.back()); 172 } 173 if (!curr.words.empty()) 174 l.push_back(curr); 175 } -176 if (!l.empty()) { -177 assert(!out.segments.empty()); -178 trace(99, "parse") << "flushing to segment" << end(); -179 out.segments.back().lines.swap(l); -180 } -181 trace(99, "parse") << "done" << end(); -182 } -183 -184 void parse_word(const string& data, word& out) { -185 out.original = data; -186 istringstream win(data); -187 if (getline(win, out.data, '/')) { -188 string m; -189 while (getline(win, m, '/')) -190 out.metadata.push_back(m); -191 } -192 } -193 -194 string to_string(const word& w) { -195 ostringstream out; -196 out << w.data; -197 for (int i = 0; i < SIZE(w.metadata); ++i) -198 out << " /" << w.metadata.at(i); -199 return out.str(); -200 } -201 -202 //:: transform -203 -204 :(before "End Types") -205 typedef void (*transform_fn)(program&); -206 :(before "End Globals") -207 vector<transform_fn> Transform; -208 -209 void transform(program& p) { -210 trace(99, "transform") << "begin" << end(); -211 for (int t = 0; t < SIZE(Transform); ++t) -212 (*Transform.at(t))(p); -213 trace(99, "transform") << "done" << end(); -214 } -215 -216 //:: load -217 -218 void load(const program& p) { -219 trace(99, "load") << "begin" << end(); -220 if (p.segments.empty()) { -221 raise << "no code to run\n" << end(); -222 return; -223 } -224 for (int i = 0; i < SIZE(p.segments); ++i) { -225 const segment& seg = p.segments.at(i); -226 uint32_t addr = seg.start; -227 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); -228 for (int j = 0; j < SIZE(seg.lines); ++j) { -229 const line& l = seg.lines.at(j); -230 for (int k = 0; k < SIZE(l.words); ++k) { -231 const word& w = l.words.at(k); -232 uint8_t val = hex_byte(w.data); -233 if (trace_contains_errors()) return; -234 write_mem_u8(addr, val); -235 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); -236 ++addr; -237 } -238 } -239 if (i == 0) End_of_program = addr; -240 } -241 EIP = p.segments.at(0).start; -242 trace(99, "load") << "done" << end(); -243 } -244 -245 uint8_t hex_byte(const string& s) { -246 istringstream in(s); -247 int result = 0; -248 in >> std::hex >> result; -249 if (!in || !in.eof()) { -250 raise << "token '" << s << "' is not a hex byte\n" << end(); -251 return '\0'; -252 } -253 if (result > 0xff || result < -0x8f) { -254 raise << "token '" << s << "' is not a hex byte\n" << end(); -255 return '\0'; -256 } -257 return static_cast<uint8_t>(result); -258 } -259 -260 :(scenarios parse_and_load) -261 :(scenario number_too_large) -262 % Hide_errors = true; -263 == 0x1 -264 05 cab -265 +error: token 'cab' is not a hex byte -266 -267 :(scenario invalid_hex) -268 % Hide_errors = true; -269 == 0x1 -270 05 cx -271 +error: token 'cx' is not a hex byte +176 flush(out, l); +177 trace(99, "parse") << "done" << end(); +178 } +179 +180 void flush(program& p, vector<line>& lines) { +181 if (lines.empty()) return; +182 if (p.segments.empty()) { +183 raise << "input does not start with a '==' section header\n" << end(); +184 return; +185 } +186 // End flush(p, lines) Special-cases +187 trace(99, "parse") << "flushing to segment" << end(); +188 p.segments.back().lines.swap(lines); +189 } +190 +191 void parse_word(const string& data, word& out) { +192 out.original = data; +193 istringstream win(data); +194 if (getline(win, out.data, '/')) { +195 string m; +196 while (getline(win, m, '/')) +197 out.metadata.push_back(m); +198 } +199 } +200 +201 void sanity_check_program_segment(const program& p, uint32_t addr) { +202 for (int i = 0; i < SIZE(p.segments); ++i) { +203 if (p.segments.at(i).start == addr) +204 raise << "can't have multiple segments starting at address 0x" << std::hex << addr << '\n' << end(); +205 } +206 } +207 +208 // helper for tests +209 void parse(const string& text_bytes) { +210 program p; +211 istringstream in(text_bytes); +212 parse(in, p); +213 } +214 +215 :(scenarios parse) +216 :(scenario detect_duplicate_segments) +217 % Hide_errors = true; +218 == 0xee +219 ab +220 == 0xee +221 cd +222 +error: can't have multiple segments starting at address 0xee +223 +224 //:: transform +225 +226 :(before "End Types") +227 typedef void (*transform_fn)(program&); +228 :(before "End Globals") +229 vector<transform_fn> Transform; +230 +231 void transform(program& p) { +232 trace(99, "transform") << "begin" << end(); +233 for (int t = 0; t < SIZE(Transform); ++t) +234 (*Transform.at(t))(p); +235 trace(99, "transform") << "done" << end(); +236 } +237 +238 //:: load +239 +240 void load(const program& p) { +241 trace(99, "load") << "begin" << end(); +242 if (p.segments.empty()) { +243 raise << "no code to run\n" << end(); +244 return; +245 } +246 // Ensure segments are disjoint. +247 set<uint32_t> overlap; +248 for (int i = 0; i < SIZE(p.segments); ++i) { +249 const segment& seg = p.segments.at(i); +250 uint32_t addr = seg.start; +251 if (!already_allocated(addr)) +252 Mem.push_back(vma(seg.start)); +253 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); +254 for (int j = 0; j < SIZE(seg.lines); ++j) { +255 const line& l = seg.lines.at(j); +256 for (int k = 0; k < SIZE(l.words); ++k) { +257 const word& w = l.words.at(k); +258 uint8_t val = hex_byte(w.data); +259 if (trace_contains_errors()) return; +260 assert(overlap.find(addr) == overlap.end()); +261 write_mem_u8(addr, val); +262 overlap.insert(addr); +263 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); +264 ++addr; +265 } +266 } +267 if (i == 0) End_of_program = addr; +268 } +269 EIP = p.segments.at(0).start; +270 trace(99, "load") << "done" << end(); +271 } 272 -273 :(scenario negative_number) -274 == 0x1 -275 05 -12 -276 $error: 0 -277 -278 :(scenario negative_number_too_small) -279 % Hide_errors = true; -280 == 0x1 -281 05 -12345 -282 +error: token '-12345' is not a hex byte -283 -284 :(scenario hex_prefix) -285 == 0x1 -286 0x05 -0x12 -287 $error: 0 -288 -289 //: helper for tests -290 :(code) -291 void parse_and_load(const string& text_bytes) { -292 program p; -293 istringstream in(text_bytes); -294 parse(in, p); -295 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately -296 load(p); -297 } -298 -299 //:: run +273 uint8_t hex_byte(const string& s) { +274 istringstream in(s); +275 int result = 0; +276 in >> std::hex >> result; +277 if (!in || !in.eof()) { +278 raise << "token '" << s << "' is not a hex byte\n" << end(); +279 return '\0'; +280 } +281 if (result > 0xff || result < -0x8f) { +282 raise << "token '" << s << "' is not a hex byte\n" << end(); +283 return '\0'; +284 } +285 return static_cast<uint8_t>(result); +286 } +287 +288 :(scenarios parse_and_load) +289 :(scenario number_too_large) +290 % Hide_errors = true; +291 == 0x1 +292 05 cab +293 +error: token 'cab' is not a hex byte +294 +295 :(scenario invalid_hex) +296 % Hide_errors = true; +297 == 0x1 +298 05 cx +299 +error: token 'cx' is not a hex byte 300 -301 :(before "End Initialize Op Names(name)") -302 put(name, "05", "add imm32 to R0 (EAX)"); -303 -304 //: our first opcode -305 :(before "End Single-Byte Opcodes") -306 case 0x05: { // add imm32 to EAX -307 int32_t arg2 = next32(); -308 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); -309 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); -310 break; -311 } -312 -313 :(code) -314 // read a 32-bit int in little-endian order from the instruction stream -315 int32_t next32() { -316 int32_t result = next(); -317 result |= (next()<<8); -318 result |= (next()<<16); -319 result |= (next()<<24); -320 return result; -321 } -322 -323 :(code) -324 int32_t parse_int(const string& s) { -325 if (s.empty()) return 0; -326 istringstream in(s); -327 in >> std::hex; -328 if (s.at(0) == '-') { -329 int32_t result = 0; -330 in >> result; -331 if (!in || !in.eof()) { -332 raise << "not a number: " << s << '\n' << end(); -333 return 0; -334 } -335 return result; -336 } -337 uint32_t uresult = 0; -338 in >> uresult; -339 if (!in || !in.eof()) { -340 raise << "not a number: " << s << '\n' << end(); -341 return 0; -342 } -343 return static_cast<int32_t>(uresult); -344 } -345 :(before "End Unit Tests") -346 void test_parse_int() { -347 CHECK_EQ(0, parse_int("0")); -348 CHECK_EQ(0, parse_int("0x0")); -349 CHECK_EQ(0, parse_int("0x0")); -350 CHECK_EQ(16, parse_int("10")); // hex always -351 CHECK_EQ(-1, parse_int("-1")); -352 CHECK_EQ(-1, parse_int("0xffffffff")); -353 } +301 :(scenario negative_number) +302 == 0x1 +303 05 -12 +304 $error: 0 +305 +306 :(scenario negative_number_too_small) +307 % Hide_errors = true; +308 == 0x1 +309 05 -12345 +310 +error: token '-12345' is not a hex byte +311 +312 :(scenario hex_prefix) +313 == 0x1 +314 0x05 -0x12 +315 $error: 0 +316 +317 //: helper for tests +318 :(code) +319 void parse_and_load(const string& text_bytes) { +320 program p; +321 istringstream in(text_bytes); +322 parse(in, p); +323 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately +324 load(p); +325 } +326 +327 //:: run +328 +329 :(before "End Initialize Op Names(name)") +330 put(name, "05", "add imm32 to R0 (EAX)"); +331 +332 //: our first opcode +333 :(before "End Single-Byte Opcodes") +334 case 0x05: { // add imm32 to EAX +335 int32_t arg2 = next32(); +336 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); +337 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); +338 break; +339 } +340 +341 :(code) +342 // read a 32-bit int in little-endian order from the instruction stream +343 int32_t next32() { +344 int32_t result = next(); +345 result |= (next()<<8); +346 result |= (next()<<16); +347 result |= (next()<<24); +348 return result; +349 } +350 +351 //:: helpers +352 +353 :(code) +354 string to_string(const word& w) { +355 ostringstream out; +356 out << w.data; +357 for (int i = 0; i < SIZE(w.metadata); ++i) +358 out << " /" << w.metadata.at(i); +359 return out.str(); +360 } +361 +362 int32_t parse_int(const string& s) { +363 if (s.empty()) return 0; +364 istringstream in(s); +365 in >> std::hex; +366 if (s.at(0) == '-') { +367 int32_t result = 0; +368 in >> result; +369 if (!in || !in.eof()) { +370 raise << "not a number: " << s << '\n' << end(); +371 return 0; +372 } +373 return result; +374 } +375 uint32_t uresult = 0; +376 in >> uresult; +377 if (!in || !in.eof()) { +378 raise << "not a number: " << s << '\n' << end(); +379 return 0; +380 } +381 return static_cast<int32_t>(uresult); +382 } +383 :(before "End Unit Tests") +384 void test_parse_int() { +385 CHECK_EQ(0, parse_int("0")); +386 CHECK_EQ(0, parse_int("0x0")); +387 CHECK_EQ(0, parse_int("0x0")); +388 CHECK_EQ(16, parse_int("10")); // hex always +389 CHECK_EQ(-1, parse_int("-1")); +390 CHECK_EQ(-1, parse_int("0xffffffff")); +391 } diff --git a/html/subx/012elf.cc.html b/html/subx/012elf.cc.html index fafd6df5..919c4689 100644 --- a/html/subx/012elf.cc.html +++ b/html/subx/012elf.cc.html @@ -68,161 +68,169 @@ if ('onhashchange' in window) { 9 assert(argc > 2); 10 reset(); 11 cerr << std::hex; - 12 initialize_mem(); - 13 Mem_offset = CODE_START; - 14 load_elf(argv[2], argc, argv); - 15 while (EIP < End_of_program) // weak final-gasp termination check - 16 run_one_instruction(); - 17 trace(90, "load") << "executed past end of the world: " << EIP << " vs " << End_of_program << end(); - 18 return 0; - 19 } - 20 - 21 :(code) - 22 void load_elf(const string& filename, int argc, char* argv[]) { - 23 int fd = open(filename.c_str(), O_RDONLY); - 24 if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die(); - 25 off_t size = lseek(fd, 0, SEEK_END); - 26 lseek(fd, 0, SEEK_SET); - 27 uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size)); - 28 if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die(); - 29 ssize_t read_size = read(fd, elf_contents, size); - 30 if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die(); - 31 load_elf_contents(elf_contents, size, argc, argv); - 32 free(elf_contents); - 33 } - 34 - 35 void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) { - 36 uint8_t magic[5] = {0}; - 37 memcpy(magic, elf_contents, 4); - 38 if (memcmp(magic, "\177ELF", 4) != 0) - 39 raise << "Invalid ELF file; starts with \"" << magic << '"' << die(); - 40 if (elf_contents[4] != 1) - 41 raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die(); - 42 if (elf_contents[5] != 1) - 43 raise << "Only little-endian ELF files supported.\n" << die(); - 44 // unused: remaining 10 bytes of e_ident - 45 uint32_t e_machine_type = u32_in(&elf_contents[16]); - 46 if (e_machine_type != 0x00030002) - 47 raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die(); - 48 // unused: e_version. We only support version 1, and later versions will be backwards compatible. - 49 uint32_t e_entry = u32_in(&elf_contents[24]); - 50 uint32_t e_phoff = u32_in(&elf_contents[28]); - 51 // unused: e_shoff - 52 // unused: e_flags - 53 uint32_t e_ehsize = u16_in(&elf_contents[40]); - 54 if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die(); - 55 uint32_t e_phentsize = u16_in(&elf_contents[42]); - 56 uint32_t e_phnum = u16_in(&elf_contents[44]); - 57 trace(90, "load") << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end(); - 58 // unused: e_shentsize - 59 // unused: e_shnum - 60 // unused: e_shstrndx - 61 - 62 for (size_t i = 0; i < e_phnum; ++i) - 63 load_segment_from_program_header(elf_contents, size, e_phoff + i*e_phentsize, e_ehsize); - 64 - 65 // initialize code and stack - 66 Reg[ESP].u = AFTER_STACK; - 67 Reg[EBP].u = 0; - 68 EIP = e_entry; - 69 - 70 // initialize args on stack - 71 // no envp for now - 72 // we wastefully use a separate page of memory for argv - 73 uint32_t argv_data = ARGV_DATA_SEGMENT; - 74 for (int i = argc-1; i >= /*skip 'subx_bin' and 'run'*/2; --i) { - 75 push(argv_data); - 76 for (size_t j = 0; j <= strlen(argv[i]); ++j) { - 77 write_mem_u8(argv_data, argv[i][j]); - 78 argv_data += sizeof(char); - 79 assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_SIZE); - 80 } - 81 } - 82 push(argc-/*skip 'subx_bin' and 'run'*/2); - 83 } - 84 - 85 void push(uint32_t val) { - 86 Reg[ESP].u -= 4; - 87 trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); - 88 trace(90, "run") << "pushing value 0x" << HEXWORD << val << end(); - 89 write_mem_u32(Reg[ESP].u, val); - 90 } - 91 - 92 void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32_t offset, uint32_t e_ehsize) { - 93 uint32_t p_type = u32_in(&elf_contents[offset]); - 94 trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end(); - 95 if (p_type != 1) { - 96 trace(90, "load") << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end(); - 97 return; - 98 } - 99 uint32_t p_offset = u32_in(&elf_contents[offset + 4]); -100 uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]); -101 if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die(); -102 // unused: p_paddr -103 uint32_t p_filesz = u32_in(&elf_contents[offset + 16]); -104 uint32_t p_memsz = u32_in(&elf_contents[offset + 20]); -105 if (p_filesz != p_memsz) -106 raise << "Can't handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die(); -107 -108 if (p_offset + p_filesz > size) -109 raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die(); -110 if (Mem.size() < p_vaddr + p_memsz) -111 Mem.resize(p_vaddr + p_memsz); -112 if (size > p_memsz) size = p_memsz; -113 trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end(); -114 for (size_t i = 0; i < p_filesz; ++i) -115 write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]); -116 if (End_of_program < p_vaddr+p_memsz) -117 End_of_program = p_vaddr+p_memsz; -118 } -119 -120 :(before "End Includes") -121 // Very primitive/fixed/insecure ELF segments for now. -122 // code: 0x08048000 -> 0x08048fff -123 // data: 0x08049000 -> 0x08049fff -124 // heap: 0x0804a000 -> 0x0804afff -125 // stack: 0x0804bfff -> 0x0804b000 (downward) -126 const int CODE_START = 0x08048000; -127 const int SEGMENT_SIZE = 0x1000; -128 const int AFTER_STACK = 0x0804c000; -129 const int ARGV_DATA_SEGMENT = 0x0804e000; -130 :(code) -131 void initialize_mem() { -132 Mem.resize(AFTER_STACK - CODE_START); -133 } -134 -135 inline uint32_t u32_in(uint8_t* p) { -136 return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; -137 } -138 -139 inline uint16_t u16_in(uint8_t* p) { -140 return p[0] | p[1] << 8; -141 } -142 -143 :(before "End Types") -144 struct perr {}; -145 :(code) -146 ostream& operator<<(ostream& os, perr /*unused*/) { -147 if (errno) -148 os << ": " << strerror(errno); -149 return os; -150 } -151 -152 :(before "End Types") -153 struct die {}; -154 :(code) -155 ostream& operator<<(ostream& /*unused*/, die /*unused*/) { -156 if (Trace_stream) Trace_stream->newline(); -157 exit(1); + 12 load_elf(argv[2], argc, argv); + 13 while (EIP < End_of_program) // weak final-gasp termination check + 14 run_one_instruction(); + 15 trace(90, "load") << "executed past end of the world: " << EIP << " vs " << End_of_program << end(); + 16 return 0; + 17 } + 18 + 19 :(code) + 20 void load_elf(const string& filename, int argc, char* argv[]) { + 21 int fd = open(filename.c_str(), O_RDONLY); + 22 if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die(); + 23 off_t size = lseek(fd, 0, SEEK_END); + 24 lseek(fd, 0, SEEK_SET); + 25 uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size)); + 26 if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die(); + 27 ssize_t read_size = read(fd, elf_contents, size); + 28 if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die(); + 29 load_elf_contents(elf_contents, size, argc, argv); + 30 free(elf_contents); + 31 } + 32 + 33 void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) { + 34 uint8_t magic[5] = {0}; + 35 memcpy(magic, elf_contents, 4); + 36 if (memcmp(magic, "\177ELF", 4) != 0) + 37 raise << "Invalid ELF file; starts with \"" << magic << '"' << die(); + 38 if (elf_contents[4] != 1) + 39 raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die(); + 40 if (elf_contents[5] != 1) + 41 raise << "Only little-endian ELF files supported.\n" << die(); + 42 // unused: remaining 10 bytes of e_ident + 43 uint32_t e_machine_type = u32_in(&elf_contents[16]); + 44 if (e_machine_type != 0x00030002) + 45 raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die(); + 46 // unused: e_version. We only support version 1, and later versions will be backwards compatible. + 47 uint32_t e_entry = u32_in(&elf_contents[24]); + 48 uint32_t e_phoff = u32_in(&elf_contents[28]); + 49 // unused: e_shoff + 50 // unused: e_flags + 51 uint32_t e_ehsize = u16_in(&elf_contents[40]); + 52 if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die(); + 53 uint32_t e_phentsize = u16_in(&elf_contents[42]); + 54 uint32_t e_phnum = u16_in(&elf_contents[44]); + 55 trace(90, "load") << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end(); + 56 // unused: e_shentsize + 57 // unused: e_shnum + 58 // unused: e_shstrndx + 59 + 60 set<uint32_t> overlap; // to detect overlapping segments + 61 for (size_t i = 0; i < e_phnum; ++i) + 62 load_segment_from_program_header(elf_contents, i, size, e_phoff + i*e_phentsize, e_ehsize, overlap); + 63 + 64 // initialize code and stack + 65 assert(overlap.find(STACK_SEGMENT) == overlap.end()); + 66 Mem.push_back(vma(STACK_SEGMENT)); + 67 assert(overlap.find(AFTER_STACK) == overlap.end()); + 68 Reg[ESP].u = AFTER_STACK; + 69 Reg[EBP].u = 0; + 70 EIP = e_entry; + 71 + 72 // initialize args on stack + 73 // no envp for now + 74 // we wastefully use a separate page of memory for argv + 75 Mem.push_back(vma(ARGV_DATA_SEGMENT)); + 76 uint32_t argv_data = ARGV_DATA_SEGMENT; + 77 for (int i = argc-1; i >= /*skip 'subx_bin' and 'run'*/2; --i) { + 78 push(argv_data); + 79 for (size_t j = 0; j <= strlen(argv[i]); ++j) { + 80 assert(overlap.find(argv_data) == overlap.end()); // don't bother comparing ARGV and STACK + 81 write_mem_u8(argv_data, argv[i][j]); + 82 argv_data += sizeof(char); + 83 assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_SIZE); + 84 } + 85 } + 86 push(argc-/*skip 'subx_bin' and 'run'*/2); + 87 } + 88 + 89 void push(uint32_t val) { + 90 Reg[ESP].u -= 4; + 91 trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); + 92 trace(90, "run") << "pushing value 0x" << HEXWORD << val << end(); + 93 write_mem_u32(Reg[ESP].u, val); + 94 } + 95 + 96 void load_segment_from_program_header(uint8_t* elf_contents, int segment_index, size_t size, uint32_t offset, uint32_t e_ehsize, set<uint32_t>& overlap) { + 97 uint32_t p_type = u32_in(&elf_contents[offset]); + 98 trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end(); + 99 if (p_type != 1) { +100 trace(90, "load") << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end(); +101 return; +102 } +103 uint32_t p_offset = u32_in(&elf_contents[offset + 4]); +104 uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]); +105 if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die(); +106 // unused: p_paddr +107 uint32_t p_filesz = u32_in(&elf_contents[offset + 16]); +108 uint32_t p_memsz = u32_in(&elf_contents[offset + 20]); +109 if (p_filesz != p_memsz) +110 raise << "Can't yet handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die(); +111 +112 if (p_offset + p_filesz > size) +113 raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die(); +114 if (p_memsz > INITIAL_SEGMENT_SIZE) { +115 raise << "Code segment too small for SubX; for now please manually increase INITIAL_SEGMENT_SIZE.\n" << end(); +116 return; +117 } +118 trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end(); +119 if (size > p_memsz) size = p_memsz; +120 Mem.push_back(vma(p_vaddr)); +121 for (size_t i = 0; i < p_filesz; ++i) { +122 assert(overlap.find(p_vaddr+i) == overlap.end()); +123 write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]); +124 overlap.insert(p_vaddr+i); +125 } +126 if (segment_index == 0 && End_of_program < p_vaddr+p_memsz) +127 End_of_program = p_vaddr+p_memsz; +128 } +129 +130 :(before "End Includes") +131 // Very primitive/fixed/insecure ELF segments for now. +132 // code: 0x08048000 -> 0x08048fff +133 // data/heap: 0x08050000 -> 0x08050fff +134 // stack: 0x08060fff -> 0x08060000 (downward) +135 const int SEGMENT_SIZE = 0x1000; +136 const int CODE_START = 0x08048000; +137 const int DATA_SEGMENT = 0x08050000; +138 const int HEAP_SEGMENT = DATA_SEGMENT; +139 const int STACK_SEGMENT = 0x08060000; +140 const int AFTER_STACK = 0x08060ffc; // forget final word because of the off-by-one with INITIAL_SEGMENT_SIZE; +141 const int ARGV_DATA_SEGMENT = 0x08070000; +142 :(code) +143 inline uint32_t u32_in(uint8_t* p) { +144 return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; +145 } +146 +147 inline uint16_t u16_in(uint8_t* p) { +148 return p[0] | p[1] << 8; +149 } +150 +151 :(before "End Types") +152 struct perr {}; +153 :(code) +154 ostream& operator<<(ostream& os, perr /*unused*/) { +155 if (errno) +156 os << ": " << strerror(errno); +157 return os; 158 } 159 -160 :(before "End Includes") -161 #include <sys/types.h> -162 #include <sys/stat.h> -163 #include <fcntl.h> -164 #include <stdarg.h> -165 #include <errno.h> -166 #include <unistd.h> +160 :(before "End Types") +161 struct die {}; +162 :(code) +163 ostream& operator<<(ostream& /*unused*/, die /*unused*/) { +164 if (Trace_stream) Trace_stream->newline(); +165 exit(1); +166 } +167 +168 :(before "End Includes") +169 #include <sys/types.h> +170 #include <sys/stat.h> +171 #include <fcntl.h> +172 #include <stdarg.h> +173 #include <errno.h> +174 #include <unistd.h> diff --git a/html/subx/013direct_addressing.cc.html b/html/subx/013direct_addressing.cc.html index e4eeb855..173e04e2 100644 --- a/html/subx/013direct_addressing.cc.html +++ b/html/subx/013direct_addressing.cc.html @@ -66,7 +66,7 @@ if ('onhashchange' in window) { 1 //: operating directly on a register 2 3 :(before "End Initialize Op Names(name)") - 4 put(name, "01", "add r32 to rm32"); + 4 put(name, "01", "add r32 to rm32"); 5 6 :(scenario add_r32_to_r32) 7 % Reg[EAX].i = 0x10; @@ -81,9 +81,9 @@ if ('onhashchange' in window) { 16 17 :(before "End Single-Byte Opcodes") 18 case 0x01: { // add r32 to r/m32 - 19 uint8_t modrm = next(); + 19 uint8_t modrm = next(); 20 uint8_t arg2 = (modrm>>3)&0x7; - 21 trace(90, "run") << "add " << rname(arg2) << " to r/m32" << end(); + 21 trace(90, "run") << "add " << rname(arg2) << " to r/m32" << end(); 22 int32_t* arg1 = effective_address(modrm); 23 BINARY_ARITHMETIC_OP(+, *arg1, Reg[arg2].i); 24 break; @@ -99,10 +99,10 @@ if ('onhashchange' in window) { 34 uint8_t rm = modrm & 0x7; 35 if (mod == 3) { 36 // mod 3 is just register direct addressing - 37 trace(90, "run") << "r/m32 is " << rname(rm) << end(); + 37 trace(90, "run") << "r/m32 is " << rname(rm) << end(); 38 return &Reg[rm].i; 39 } - 40 return mem_addr_i32(effective_address_number(modrm)); + 40 return mem_addr_i32(effective_address_number(modrm)); 41 } 42 43 uint32_t effective_address_number(uint8_t modrm) { @@ -113,11 +113,11 @@ if ('onhashchange' in window) { 48 switch (mod) { 49 case 3: 50 // mod 3 is just register direct addressing - 51 raise << "unexpected direct addressing mode\n" << end(); + 51 raise << "unexpected direct addressing mode\n" << end(); 52 return 0; 53 // End Mod Special-cases(addr) 54 default: - 55 cerr << "unrecognized mod bits: " << NUM(mod) << '\n'; + 55 cerr << "unrecognized mod bits: " << NUM(mod) << '\n'; 56 exit(1); 57 } 58 //: other mods are indirect, and they'll set addr appropriately @@ -134,14 +134,14 @@ if ('onhashchange' in window) { 69 case 5: return "EBP"; 70 case 6: return "ESI"; 71 case 7: return "EDI"; - 72 default: raise << "invalid register " << r << '\n' << end(); return ""; + 72 default: raise << "invalid register " << r << '\n' << end(); return ""; 73 } 74 } 75 76 //:: subtract 77 78 :(before "End Initialize Op Names(name)") - 79 put(name, "29", "subtract r32 from rm32"); + 79 put(name, "29", "subtract r32 from rm32"); 80 81 :(scenario subtract_r32_from_r32) 82 % Reg[EAX].i = 10; @@ -156,9 +156,9 @@ if ('onhashchange' in window) { 91 92 :(before "End Single-Byte Opcodes") 93 case 0x29: { // subtract r32 from r/m32 - 94 uint8_t modrm = next(); + 94 uint8_t modrm = next(); 95 uint8_t arg2 = (modrm>>3)&0x7; - 96 trace(90, "run") << "subtract " << rname(arg2) << " from r/m32" << end(); + 96 trace(90, "run") << "subtract " << rname(arg2) << " from r/m32" << end(); 97 int32_t* arg1 = effective_address(modrm); 98 BINARY_ARITHMETIC_OP(-, *arg1, Reg[arg2].i); 99 break; @@ -167,7 +167,7 @@ if ('onhashchange' in window) { 102 //:: multiply 103 104 :(before "End Initialize Op Names(name)") -105 put(name, "f7", "test/negate/mul/div rm32 (with EAX if necessary) depending on subop"); +105 put(name, "f7", "test/negate/mul/div rm32 (with EAX if necessary) depending on subop"); 106 107 :(scenario multiply_eax_by_r32) 108 % Reg[EAX].i = 4; @@ -183,23 +183,23 @@ if ('onhashchange' in window) { 118 119 :(before "End Single-Byte Opcodes") 120 case 0xf7: { // xor r32 with r/m32 -121 uint8_t modrm = next(); -122 trace(90, "run") << "operate on r/m32" << end(); +121 uint8_t modrm = next(); +122 trace(90, "run") << "operate on r/m32" << end(); 123 int32_t* arg1 = effective_address(modrm); 124 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits 125 switch (subop) { 126 case 4: { // mul unsigned EAX by r/m32 -127 trace(90, "run") << "subop: multiply EAX by r/m32" << end(); +127 trace(90, "run") << "subop: multiply EAX by r/m32" << end(); 128 uint64_t result = Reg[EAX].u * static_cast<uint32_t>(*arg1); 129 Reg[EAX].u = result & 0xffffffff; 130 Reg[EDX].u = result >> 32; 131 OF = (Reg[EDX].u != 0); -132 trace(90, "run") << "storing 0x" << HEXWORD << Reg[EAX].u << end(); +132 trace(90, "run") << "storing 0x" << HEXWORD << Reg[EAX].u << end(); 133 break; 134 } 135 // End Op f7 Subops 136 default: -137 cerr << "unrecognized sub-opcode after f7: " << NUM(subop) << '\n'; +137 cerr << "unrecognized sub-opcode after f7: " << NUM(subop) << '\n'; 138 exit(1); 139 } 140 break; @@ -208,7 +208,7 @@ if ('onhashchange' in window) { 143 //: 144 145 :(before "End Initialize Op Names(name)") -146 put(name_0f, "af", "multiply rm32 into r32"); +146 put(name_0f, "af", "multiply rm32 into r32"); 147 148 :(scenario multiply_r32_into_r32) 149 % Reg[EAX].i = 4; @@ -223,9 +223,9 @@ if ('onhashchange' in window) { 158 159 :(before "End Two-Byte Opcodes Starting With 0f") 160 case 0xaf: { // multiply r32 into r/m32 -161 uint8_t modrm = next(); +161 uint8_t modrm = next(); 162 uint8_t arg2 = (modrm>>3)&0x7; -163 trace(90, "run") << "multiply r/m32 into " << rname(arg2) << end(); +163 trace(90, "run") << "multiply r/m32 into " << rname(arg2) << end(); 164 int32_t* arg1 = effective_address(modrm); 165 BINARY_ARITHMETIC_OP(*, Reg[arg2].i, *arg1); 166 break; @@ -234,7 +234,7 @@ if ('onhashchange' in window) { 169 //:: and 170 171 :(before "End Initialize Op Names(name)") -172 put(name, "21", "rm32 = bitwise AND of r32 with rm32"); +172 put(name, "21", "rm32 = bitwise AND of r32 with rm32"); 173 174 :(scenario and_r32_with_r32) 175 % Reg[EAX].i = 0x0a0b0c0d; @@ -249,9 +249,9 @@ if ('onhashchange' in window) { 184 185 :(before "End Single-Byte Opcodes") 186 case 0x21: { // and r32 with r/m32 -187 uint8_t modrm = next(); +187 uint8_t modrm = next(); 188 uint8_t arg2 = (modrm>>3)&0x7; -189 trace(90, "run") << "and " << rname(arg2) << " with r/m32" << end(); +189 trace(90, "run") << "and " << rname(arg2) << " with r/m32" << end(); 190 int32_t* arg1 = effective_address(modrm); 191 BINARY_BITWISE_OP(&, *arg1, Reg[arg2].u); 192 break; @@ -260,7 +260,7 @@ if ('onhashchange' in window) { 195 //:: or 196 197 :(before "End Initialize Op Names(name)") -198 put(name, "09", "rm32 = bitwise OR of r32 with rm32"); +198 put(name, "09", "rm32 = bitwise OR of r32 with rm32"); 199 200 :(scenario or_r32_with_r32) 201 % Reg[EAX].i = 0x0a0b0c0d; @@ -275,9 +275,9 @@ if ('onhashchange' in window) { 210 211 :(before "End Single-Byte Opcodes") 212 case 0x09: { // or r32 with r/m32 -213 uint8_t modrm = next(); +213 uint8_t modrm = next(); 214 uint8_t arg2 = (modrm>>3)&0x7; -215 trace(90, "run") << "or " << rname(arg2) << " with r/m32" << end(); +215 trace(90, "run") << "or " << rname(arg2) << " with r/m32" << end(); 216 int32_t* arg1 = effective_address(modrm); 217 BINARY_BITWISE_OP(|, *arg1, Reg[arg2].u); 218 break; @@ -286,7 +286,7 @@ if ('onhashchange' in window) { 221 //:: xor 222 223 :(before "End Initialize Op Names(name)") -224 put(name, "31", "rm32 = bitwise XOR of r32 with rm32"); +224 put(name, "31", "rm32 = bitwise XOR of r32 with rm32"); 225 226 :(scenario xor_r32_with_r32) 227 % Reg[EAX].i = 0x0a0b0c0d; @@ -301,9 +301,9 @@ if ('onhashchange' in window) { 236 237 :(before "End Single-Byte Opcodes") 238 case 0x31: { // xor r32 with r/m32 -239 uint8_t modrm = next(); +239 uint8_t modrm = next(); 240 uint8_t arg2 = (modrm>>3)&0x7; -241 trace(90, "run") << "xor " << rname(arg2) << " with r/m32" << end(); +241 trace(90, "run") << "xor " << rname(arg2) << " with r/m32" << end(); 242 int32_t* arg1 = effective_address(modrm); 243 BINARY_BITWISE_OP(^, *arg1, Reg[arg2].u); 244 break; @@ -312,7 +312,7 @@ if ('onhashchange' in window) { 247 //:: not 248 249 :(before "End Initialize Op Names(name)") -250 put(name, "f7", "bitwise complement of rm32"); +250 put(name, "f7", "bitwise complement of rm32"); 251 252 :(scenario not_r32) 253 % Reg[EBX].i = 0x0f0f00ff; @@ -327,9 +327,9 @@ if ('onhashchange' in window) { 262 263 :(before "End Op f7 Subops") 264 case 2: { // not r/m32 -265 trace(90, "run") << "subop: not" << end(); +265 trace(90, "run") << "subop: not" << end(); 266 *arg1 = ~(*arg1); -267 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); +267 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); 268 SF = (*arg1 >> 31); 269 ZF = (*arg1 == 0); 270 OF = false; @@ -339,7 +339,7 @@ if ('onhashchange' in window) { 274 //:: compare (cmp) 275 276 :(before "End Initialize Op Names(name)") -277 put(name, "39", "compare: set SF if rm32 < r32"); +277 put(name, "39", "compare: set SF if rm32 < r32"); 278 279 :(scenario compare_r32_with_r32_greater) 280 % Reg[EAX].i = 0x0a0b0c0d; @@ -354,9 +354,9 @@ if ('onhashchange' in window) { 289 290 :(before "End Single-Byte Opcodes") 291 case 0x39: { // set SF if r/m32 < r32 -292 uint8_t modrm = next(); +292 uint8_t modrm = next(); 293 uint8_t reg2 = (modrm>>3)&0x7; -294 trace(90, "run") << "compare " << rname(reg2) << " with r/m32" << end(); +294 trace(90, "run") << "compare " << rname(reg2) << " with r/m32" << end(); 295 int32_t* arg1 = effective_address(modrm); 296 int32_t arg2 = Reg[reg2].i; 297 int32_t tmp1 = *arg1 - arg2; @@ -364,7 +364,7 @@ if ('onhashchange' in window) { 299 ZF = (tmp1 == 0); 300 int64_t tmp2 = *arg1 - arg2; 301 OF = (tmp1 != tmp2); -302 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +302 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 303 break; 304 } 305 @@ -393,7 +393,7 @@ if ('onhashchange' in window) { 328 //:: copy (mov) 329 330 :(before "End Initialize Op Names(name)") -331 put(name, "89", "copy r32 to rm32"); +331 put(name, "89", "copy r32 to rm32"); 332 333 :(scenario copy_r32_to_r32) 334 % Reg[EBX].i = 0xaf; @@ -407,19 +407,19 @@ if ('onhashchange' in window) { 342 343 :(before "End Single-Byte Opcodes") 344 case 0x89: { // copy r32 to r/m32 -345 uint8_t modrm = next(); +345 uint8_t modrm = next(); 346 uint8_t reg2 = (modrm>>3)&0x7; -347 trace(90, "run") << "copy " << rname(reg2) << " to r/m32" << end(); +347 trace(90, "run") << "copy " << rname(reg2) << " to r/m32" << end(); 348 int32_t* arg1 = effective_address(modrm); 349 *arg1 = Reg[reg2].i; -350 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); +350 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); 351 break; 352 } 353 354 //:: xchg 355 356 :(before "End Initialize Op Names(name)") -357 put(name, "87", "swap the contents of r32 and rm32"); +357 put(name, "87", "swap the contents of r32 and rm32"); 358 359 :(scenario xchg_r32_with_r32) 360 % Reg[EBX].i = 0xaf; @@ -435,29 +435,29 @@ if ('onhashchange' in window) { 370 371 :(before "End Single-Byte Opcodes") 372 case 0x87: { // exchange r32 with r/m32 -373 uint8_t modrm = next(); +373 uint8_t modrm = next(); 374 uint8_t reg2 = (modrm>>3)&0x7; -375 trace(90, "run") << "exchange " << rname(reg2) << " with r/m32" << end(); +375 trace(90, "run") << "exchange " << rname(reg2) << " with r/m32" << end(); 376 int32_t* arg1 = effective_address(modrm); 377 int32_t tmp = *arg1; 378 *arg1 = Reg[reg2].i; 379 Reg[reg2].i = tmp; -380 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end(); -381 trace(90, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end(); +380 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end(); +381 trace(90, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end(); 382 break; 383 } 384 385 //:: increment 386 387 :(before "End Initialize Op Names(name)") -388 put(name, "40", "increment R0 (EAX)"); -389 put(name, "41", "increment R1 (ECX)"); -390 put(name, "42", "increment R2 (EDX)"); -391 put(name, "43", "increment R3 (EBX)"); -392 put(name, "44", "increment R4 (ESP)"); -393 put(name, "45", "increment R5 (EBP)"); -394 put(name, "46", "increment R6 (ESI)"); -395 put(name, "47", "increment R7 (EDI)"); +388 put(name, "40", "increment R0 (EAX)"); +389 put(name, "41", "increment R1 (ECX)"); +390 put(name, "42", "increment R2 (EDX)"); +391 put(name, "43", "increment R3 (EBX)"); +392 put(name, "44", "increment R4 (ESP)"); +393 put(name, "45", "increment R5 (EBP)"); +394 put(name, "46", "increment R6 (ESI)"); +395 put(name, "47", "increment R7 (EDI)"); 396 397 :(scenario increment_r32) 398 % Reg[ECX].u = 0x1f; @@ -477,14 +477,14 @@ if ('onhashchange' in window) { 412 case 0x46: 413 case 0x47: { // increment r32 414 uint8_t reg = op & 0x7; -415 trace(90, "run") << "increment " << rname(reg) << end(); +415 trace(90, "run") << "increment " << rname(reg) << end(); 416 ++Reg[reg].u; -417 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); +417 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); 418 break; 419 } 420 421 :(before "End Initialize Op Names(name)") -422 put(name, "ff", "inc/dec/jump/push/call rm32 based on subop"); +422 put(name, "ff", "inc/dec/jump/push/call rm32 based on subop"); 423 424 :(scenario increment_rm32) 425 % Reg[EAX].u = 0x20; @@ -498,14 +498,14 @@ if ('onhashchange' in window) { 433 434 :(before "End Single-Byte Opcodes") 435 case 0xff: { -436 uint8_t modrm = next(); +436 uint8_t modrm = next(); 437 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits 438 switch (subop) { 439 case 0: { // increment r/m32 -440 trace(90, "run") << "increment r/m32" << end(); +440 trace(90, "run") << "increment r/m32" << end(); 441 int32_t* arg = effective_address(modrm); 442 ++*arg; -443 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); +443 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); 444 break; 445 } 446 // End Op ff Subops @@ -516,14 +516,14 @@ if ('onhashchange' in window) { 451 //:: decrement 452 453 :(before "End Initialize Op Names(name)") -454 put(name, "48", "decrement R0 (EAX)"); -455 put(name, "49", "decrement R1 (ECX)"); -456 put(name, "4a", "decrement R2 (EDX)"); -457 put(name, "4b", "decrement R3 (EBX)"); -458 put(name, "4c", "decrement R4 (ESP)"); -459 put(name, "4d", "decrement R5 (EBP)"); -460 put(name, "4e", "decrement R6 (ESI)"); -461 put(name, "4f", "decrement R7 (EDI)"); +454 put(name, "48", "decrement R0 (EAX)"); +455 put(name, "49", "decrement R1 (ECX)"); +456 put(name, "4a", "decrement R2 (EDX)"); +457 put(name, "4b", "decrement R3 (EBX)"); +458 put(name, "4c", "decrement R4 (ESP)"); +459 put(name, "4d", "decrement R5 (EBP)"); +460 put(name, "4e", "decrement R6 (ESI)"); +461 put(name, "4f", "decrement R7 (EDI)"); 462 463 :(scenario decrement_r32) 464 % Reg[ECX].u = 0x1f; @@ -543,9 +543,9 @@ if ('onhashchange' in window) { 478 case 0x4e: 479 case 0x4f: { // decrement r32 480 uint8_t reg = op & 0x7; -481 trace(90, "run") << "decrement " << rname(reg) << end(); +481 trace(90, "run") << "decrement " << rname(reg) << end(); 482 --Reg[reg].u; -483 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); +483 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); 484 break; 485 } 486 @@ -561,24 +561,24 @@ if ('onhashchange' in window) { 496 497 :(before "End Op ff Subops") 498 case 1: { // decrement r/m32 -499 trace(90, "run") << "decrement r/m32" << end(); +499 trace(90, "run") << "decrement r/m32" << end(); 500 int32_t* arg = effective_address(modrm); 501 --*arg; -502 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); +502 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); 503 break; 504 } 505 506 //:: push 507 508 :(before "End Initialize Op Names(name)") -509 put(name, "50", "push R0 (EAX) to stack"); -510 put(name, "51", "push R1 (ECX) to stack"); -511 put(name, "52", "push R2 (EDX) to stack"); -512 put(name, "53", "push R3 (EBX) to stack"); -513 put(name, "54", "push R4 (ESP) to stack"); -514 put(name, "55", "push R5 (EBP) to stack"); -515 put(name, "56", "push R6 (ESI) to stack"); -516 put(name, "57", "push R7 (EDI) to stack"); +509 put(name, "50", "push R0 (EAX) to stack"); +510 put(name, "51", "push R1 (ECX) to stack"); +511 put(name, "52", "push R2 (EDX) to stack"); +512 put(name, "53", "push R3 (EBX) to stack"); +513 put(name, "54", "push R4 (ESP) to stack"); +514 put(name, "55", "push R5 (EBP) to stack"); +515 put(name, "56", "push R6 (ESI) to stack"); +516 put(name, "57", "push R7 (EDI) to stack"); 517 518 :(scenario push_r32) 519 % Reg[ESP].u = 0x64; @@ -586,7 +586,7 @@ if ('onhashchange' in window) { 521 == 0x1 522 # op ModR/M SIB displacement immediate 523 53 # push EBX to stack -524 +run: push EBX +524 +run: push EBX 525 +run: decrementing ESP to 0x00000060 526 +run: pushing value 0x0000000a 527 @@ -600,60 +600,61 @@ if ('onhashchange' in window) { 535 case 0x56: 536 case 0x57: { // push r32 to stack 537 uint8_t reg = op & 0x7; -538 trace(90, "run") << "push " << rname(reg) << end(); +538 trace(90, "run") << "push " << rname(reg) << end(); 539 //? cerr << "push: " << NUM(reg) << ": " << Reg[reg].u << " => " << Reg[ESP].u << '\n'; -540 push(Reg[reg].u); +540 push(Reg[reg].u); 541 break; 542 } 543 544 //:: pop 545 546 :(before "End Initialize Op Names(name)") -547 put(name, "58", "pop top of stack to R0 (EAX)"); -548 put(name, "59", "pop top of stack to R1 (ECX)"); -549 put(name, "5a", "pop top of stack to R2 (EDX)"); -550 put(name, "5b", "pop top of stack to R3 (EBX)"); -551 put(name, "5c", "pop top of stack to R4 (ESP)"); -552 put(name, "5d", "pop top of stack to R5 (EBP)"); -553 put(name, "5e", "pop top of stack to R6 (ESI)"); -554 put(name, "5f", "pop top of stack to R7 (EDI)"); +547 put(name, "58", "pop top of stack to R0 (EAX)"); +548 put(name, "59", "pop top of stack to R1 (ECX)"); +549 put(name, "5a", "pop top of stack to R2 (EDX)"); +550 put(name, "5b", "pop top of stack to R3 (EBX)"); +551 put(name, "5c", "pop top of stack to R4 (ESP)"); +552 put(name, "5d", "pop top of stack to R5 (EBP)"); +553 put(name, "5e", "pop top of stack to R6 (ESI)"); +554 put(name, "5f", "pop top of stack to R7 (EDI)"); 555 556 :(scenario pop_r32) -557 % Reg[ESP].u = 0x60; -558 % write_mem_i32(0x60, 0x0000000a); -559 == 0x1 # code segment -560 # op ModR/M SIB displacement immediate -561 5b # pop stack to EBX -562 == 0x60 # data segment -563 0a 00 00 00 # 0x0a -564 +run: pop into EBX -565 +run: popping value 0x0000000a -566 +run: incrementing ESP to 0x00000064 -567 -568 :(before "End Single-Byte Opcodes") -569 case 0x58: -570 case 0x59: -571 case 0x5a: -572 case 0x5b: -573 case 0x5c: -574 case 0x5d: -575 case 0x5e: -576 case 0x5f: { // pop stack into r32 -577 uint8_t reg = op & 0x7; -578 trace(90, "run") << "pop into " << rname(reg) << end(); -579 //? cerr << "pop from " << Reg[ESP].u << '\n'; -580 Reg[reg].u = pop(); -581 //? cerr << "=> " << NUM(reg) << ": " << Reg[reg].u << '\n'; -582 break; -583 } -584 :(code) -585 uint32_t pop() { -586 uint32_t result = read_mem_u32(Reg[ESP].u); -587 trace(90, "run") << "popping value 0x" << HEXWORD << result << end(); -588 Reg[ESP].u += 4; -589 trace(90, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); -590 return result; -591 } +557 % Reg[ESP].u = 0x2000; +558 % Mem.push_back(vma(0x2000)); // manually allocate memory +559 % write_mem_i32(0x2000, 0x0000000a); // ..before this write +560 == 0x1 # code segment +561 # op ModR/M SIB displacement immediate +562 5b # pop stack to EBX +563 == 0x2000 # data segment +564 0a 00 00 00 # 0x0a +565 +run: pop into EBX +566 +run: popping value 0x0000000a +567 +run: incrementing ESP to 0x00002004 +568 +569 :(before "End Single-Byte Opcodes") +570 case 0x58: +571 case 0x59: +572 case 0x5a: +573 case 0x5b: +574 case 0x5c: +575 case 0x5d: +576 case 0x5e: +577 case 0x5f: { // pop stack into r32 +578 uint8_t reg = op & 0x7; +579 trace(90, "run") << "pop into " << rname(reg) << end(); +580 //? cerr << "pop from " << Reg[ESP].u << '\n'; +581 Reg[reg].u = pop(); +582 //? cerr << "=> " << NUM(reg) << ": " << Reg[reg].u << '\n'; +583 break; +584 } +585 :(code) +586 uint32_t pop() { +587 uint32_t result = read_mem_u32(Reg[ESP].u); +588 trace(90, "run") << "popping value 0x" << HEXWORD << result << end(); +589 Reg[ESP].u += 4; +590 trace(90, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); +591 return result; +592 } diff --git a/html/subx/014indirect_addressing.cc.html b/html/subx/014indirect_addressing.cc.html index 157701b7..61ce4216 100644 --- a/html/subx/014indirect_addressing.cc.html +++ b/html/subx/014indirect_addressing.cc.html @@ -67,22 +67,22 @@ if ('onhashchange' in window) { 3 4 :(scenario add_r32_to_mem_at_r32) 5 % Reg[EBX].i = 0x10; - 6 % Reg[EAX].i = 0x60; + 6 % Reg[EAX].i = 0x2000; 7 == 0x1 # code segment 8 # op ModR/M SIB displacement immediate 9 01 18 # add EBX to *EAX 10 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) - 11 == 0x60 # data segment + 11 == 0x2000 # data segment 12 01 00 00 00 # 1 13 +run: add EBX to r/m32 - 14 +run: effective address is 0x60 (EAX) + 14 +run: effective address is 0x2000 (EAX) 15 +run: storing 0x00000011 16 17 :(before "End Mod Special-cases(addr)") 18 case 0: // indirect addressing 19 switch (rm) { 20 default: // address in register - 21 trace(90, "run") << "effective address is 0x" << std::hex << Reg[rm].u << " (" << rname(rm) << ")" << end(); + 21 trace(90, "run") << "effective address is 0x" << std::hex << Reg[rm].u << " (" << rname(rm) << ")" << end(); 22 addr = Reg[rm].u; 23 break; 24 // End Mod 0 Special-cases(addr) @@ -92,26 +92,26 @@ if ('onhashchange' in window) { 28 //: 29 30 :(before "End Initialize Op Names(name)") - 31 put(name, "03", "add rm32 to r32"); + 31 put(name, "03", "add rm32 to r32"); 32 33 :(scenario add_mem_at_r32_to_r32) - 34 % Reg[EAX].i = 0x60; + 34 % Reg[EAX].i = 0x2000; 35 % Reg[EBX].i = 0x10; 36 == 0x1 # code segment 37 # op ModR/M SIB displacement immediate 38 03 18 # add *EAX to EBX 39 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) - 40 == 0x60 # data segment + 40 == 0x2000 # data segment 41 01 00 00 00 # 1 42 +run: add r/m32 to EBX - 43 +run: effective address is 0x60 (EAX) + 43 +run: effective address is 0x2000 (EAX) 44 +run: storing 0x00000011 45 46 :(before "End Single-Byte Opcodes") 47 case 0x03: { // add r/m32 to r32 - 48 uint8_t modrm = next(); + 48 uint8_t modrm = next(); 49 uint8_t arg1 = (modrm>>3)&0x7; - 50 trace(90, "run") << "add r/m32 to " << rname(arg1) << end(); + 50 trace(90, "run") << "add r/m32 to " << rname(arg1) << end(); 51 const int32_t* arg2 = effective_address(modrm); 52 BINARY_ARITHMETIC_OP(+, Reg[arg1].i, *arg2); 53 break; @@ -120,41 +120,41 @@ if ('onhashchange' in window) { 56 //:: subtract 57 58 :(scenario subtract_r32_from_mem_at_r32) - 59 % Reg[EAX].i = 0x60; + 59 % Reg[EAX].i = 0x2000; 60 % Reg[EBX].i = 1; 61 == 0x1 # code segment 62 # op ModR/M SIB displacement immediate 63 29 18 # subtract EBX from *EAX 64 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) - 65 == 0x60 # data segment + 65 == 0x2000 # data segment 66 0a 00 00 00 # 10 67 +run: subtract EBX from r/m32 - 68 +run: effective address is 0x60 (EAX) + 68 +run: effective address is 0x2000 (EAX) 69 +run: storing 0x00000009 70 71 //: 72 73 :(before "End Initialize Op Names(name)") - 74 put(name, "2b", "subtract rm32 from r32"); + 74 put(name, "2b", "subtract rm32 from r32"); 75 76 :(scenario subtract_mem_at_r32_from_r32) - 77 % Reg[EAX].i = 0x60; + 77 % Reg[EAX].i = 0x2000; 78 % Reg[EBX].i = 10; 79 == 0x1 # code segment 80 # op ModR/M SIB displacement immediate 81 2b 18 # subtract *EAX from EBX 82 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) - 83 == 0x60 # data segment + 83 == 0x2000 # data segment 84 01 00 00 00 # 1 85 +run: subtract r/m32 from EBX - 86 +run: effective address is 0x60 (EAX) + 86 +run: effective address is 0x2000 (EAX) 87 +run: storing 0x00000009 88 89 :(before "End Single-Byte Opcodes") 90 case 0x2b: { // subtract r/m32 from r32 - 91 uint8_t modrm = next(); + 91 uint8_t modrm = next(); 92 uint8_t arg1 = (modrm>>3)&0x7; - 93 trace(90, "run") << "subtract r/m32 from " << rname(arg1) << end(); + 93 trace(90, "run") << "subtract r/m32 from " << rname(arg1) << end(); 94 const int32_t* arg2 = effective_address(modrm); 95 BINARY_ARITHMETIC_OP(-, Reg[arg1].i, *arg2); 96 break; @@ -163,41 +163,41 @@ if ('onhashchange' in window) { 99 //:: and 100 101 :(scenario and_r32_with_mem_at_r32) -102 % Reg[EAX].i = 0x60; +102 % Reg[EAX].i = 0x2000; 103 % Reg[EBX].i = 0xff; 104 == 0x1 # code segment 105 # op ModR/M SIB displacement immediate 106 21 18 # and EBX with *EAX 107 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -108 == 0x60 # data segment +108 == 0x2000 # data segment 109 0d 0c 0b 0a # 0x0a0b0c0d 110 +run: and EBX with r/m32 -111 +run: effective address is 0x60 (EAX) +111 +run: effective address is 0x2000 (EAX) 112 +run: storing 0x0000000d 113 114 //: 115 116 :(before "End Initialize Op Names(name)") -117 put(name, "23", "r32 = bitwise AND of r32 with rm32"); +117 put(name, "23", "r32 = bitwise AND of r32 with rm32"); 118 119 :(scenario and_mem_at_r32_with_r32) -120 % Reg[EAX].i = 0x60; +120 % Reg[EAX].i = 0x2000; 121 % Reg[EBX].i = 0x0a0b0c0d; 122 == 0x1 # code segment 123 # op ModR/M SIB displacement immediate 124 23 18 # and *EAX with EBX 125 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -126 == 0x60 # data segment +126 == 0x2000 # data segment 127 ff 00 00 00 # 0xff 128 +run: and r/m32 with EBX -129 +run: effective address is 0x60 (EAX) +129 +run: effective address is 0x2000 (EAX) 130 +run: storing 0x0000000d 131 132 :(before "End Single-Byte Opcodes") 133 case 0x23: { // and r/m32 with r32 -134 uint8_t modrm = next(); +134 uint8_t modrm = next(); 135 uint8_t arg1 = (modrm>>3)&0x7; -136 trace(90, "run") << "and r/m32 with " << rname(arg1) << end(); +136 trace(90, "run") << "and r/m32 with " << rname(arg1) << end(); 137 const int32_t* arg2 = effective_address(modrm); 138 BINARY_BITWISE_OP(&, Reg[arg1].u, *arg2); 139 break; @@ -206,41 +206,41 @@ if ('onhashchange' in window) { 142 //:: or 143 144 :(scenario or_r32_with_mem_at_r32) -145 % Reg[EAX].i = 0x60; +145 % Reg[EAX].i = 0x2000; 146 % Reg[EBX].i = 0xa0b0c0d0; 147 == 0x1 # code segment 148 # op ModR/M SIB displacement immediate 149 09 18 # or EBX with *EAX 150 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -151 == 0x60 # data segment +151 == 0x2000 # data segment 152 0d 0c 0b 0a # 0x0a0b0c0d 153 +run: or EBX with r/m32 -154 +run: effective address is 0x60 (EAX) +154 +run: effective address is 0x2000 (EAX) 155 +run: storing 0xaabbccdd 156 157 //: 158 159 :(before "End Initialize Op Names(name)") -160 put(name, "0b", "r32 = bitwise OR of r32 with rm32"); +160 put(name, "0b", "r32 = bitwise OR of r32 with rm32"); 161 162 :(scenario or_mem_at_r32_with_r32) -163 % Reg[EAX].i = 0x60; +163 % Reg[EAX].i = 0x2000; 164 % Reg[EBX].i = 0xa0b0c0d0; 165 == 0x1 # code segment 166 # op ModR/M SIB displacement immediate 167 0b 18 # or *EAX with EBX 168 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -169 == 0x60 # data segment +169 == 0x2000 # data segment 170 0d 0c 0b 0a # 0x0a0b0c0d 171 +run: or r/m32 with EBX -172 +run: effective address is 0x60 (EAX) +172 +run: effective address is 0x2000 (EAX) 173 +run: storing 0xaabbccdd 174 175 :(before "End Single-Byte Opcodes") 176 case 0x0b: { // or r/m32 with r32 -177 uint8_t modrm = next(); +177 uint8_t modrm = next(); 178 uint8_t arg1 = (modrm>>3)&0x7; -179 trace(90, "run") << "or r/m32 with " << rname(arg1) << end(); +179 trace(90, "run") << "or r/m32 with " << rname(arg1) << end(); 180 const int32_t* arg2 = effective_address(modrm); 181 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); 182 break; @@ -249,41 +249,41 @@ if ('onhashchange' in window) { 185 //:: xor 186 187 :(scenario xor_r32_with_mem_at_r32) -188 % Reg[EAX].i = 0x60; +188 % Reg[EAX].i = 0x2000; 189 % Reg[EBX].i = 0xa0b0c0d0; 190 == 0x1 # code segment 191 # op ModR/M SIB displacement immediate 192 31 18 # xor EBX with *EAX 193 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -194 == 0x60 # data segment +194 == 0x2000 # data segment 195 0d 0c bb aa # 0xaabb0c0d 196 +run: xor EBX with r/m32 -197 +run: effective address is 0x60 (EAX) +197 +run: effective address is 0x2000 (EAX) 198 +run: storing 0x0a0bccdd 199 200 //: 201 202 :(before "End Initialize Op Names(name)") -203 put(name, "33", "r32 = bitwise XOR of r32 with rm32"); +203 put(name, "33", "r32 = bitwise XOR of r32 with rm32"); 204 205 :(scenario xor_mem_at_r32_with_r32) -206 % Reg[EAX].i = 0x60; +206 % Reg[EAX].i = 0x2000; 207 % Reg[EBX].i = 0xa0b0c0d0; 208 == 0x1 # code segment 209 # op ModR/M SIB displacement immediate 210 33 18 # xor *EAX with EBX 211 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -212 == 0x60 # data segment +212 == 0x2000 # data segment 213 0d 0c 0b 0a # 0x0a0b0c0d 214 +run: xor r/m32 with EBX -215 +run: effective address is 0x60 (EAX) +215 +run: effective address is 0x2000 (EAX) 216 +run: storing 0xaabbccdd 217 218 :(before "End Single-Byte Opcodes") 219 case 0x33: { // xor r/m32 with r32 -220 uint8_t modrm = next(); +220 uint8_t modrm = next(); 221 uint8_t arg1 = (modrm>>3)&0x7; -222 trace(90, "run") << "xor r/m32 with " << rname(arg1) << end(); +222 trace(90, "run") << "xor r/m32 with " << rname(arg1) << end(); 223 const int32_t* arg2 = effective_address(modrm); 224 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); 225 break; @@ -292,82 +292,82 @@ if ('onhashchange' in window) { 228 //:: not 229 230 :(scenario not_of_mem_at_r32) -231 % Reg[EBX].i = 0x60; +231 % Reg[EBX].i = 0x2000; 232 == 0x1 # code segment 233 # op ModR/M SIB displacement immediate 234 f7 13 # negate *EBX 235 # ModR/M in binary: 00 (indirect mode) 010 (subop not) 011 (dest EBX) -236 == 0x60 # data segment +236 == 0x2000 # data segment 237 ff 00 0f 0f # 0x0f0f00ff 238 +run: operate on r/m32 -239 +run: effective address is 0x60 (EBX) +239 +run: effective address is 0x2000 (EBX) 240 +run: subop: not 241 +run: storing 0xf0f0ff00 242 243 //:: compare (cmp) 244 245 :(scenario compare_mem_at_r32_with_r32_greater) -246 % Reg[EAX].i = 0x60; +246 % Reg[EAX].i = 0x2000; 247 % Reg[EBX].i = 0x0a0b0c07; 248 == 0x1 # code segment 249 # op ModR/M SIB displacement immediate 250 39 18 # compare EBX with *EAX 251 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -252 == 0x60 # data segment +252 == 0x2000 # data segment 253 0d 0c 0b 0a # 0x0a0b0c0d 254 +run: compare EBX with r/m32 -255 +run: effective address is 0x60 (EAX) +255 +run: effective address is 0x2000 (EAX) 256 +run: SF=0; ZF=0; OF=0 257 258 :(scenario compare_mem_at_r32_with_r32_lesser) -259 % Reg[EAX].i = 0x60; +259 % Reg[EAX].i = 0x2000; 260 % Reg[EBX].i = 0x0a0b0c0d; 261 == 0x1 # code segment 262 # op ModR/M SIB displacement immediate 263 39 18 # compare EBX with *EAX 264 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -265 == 0x60 # data segment +265 == 0x2000 # data segment 266 07 0c 0b 0a # 0x0a0b0c0d 267 +run: compare EBX with r/m32 -268 +run: effective address is 0x60 (EAX) +268 +run: effective address is 0x2000 (EAX) 269 +run: SF=1; ZF=0; OF=0 270 271 :(scenario compare_mem_at_r32_with_r32_equal) -272 % Reg[EAX].i = 0x60; +272 % Reg[EAX].i = 0x2000; 273 % Reg[EBX].i = 0x0a0b0c0d; 274 == 0x1 # code segment 275 # op ModR/M SIB displacement immediate 276 39 18 # compare EBX with *EAX 277 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -278 == 0x60 # data segment +278 == 0x2000 # data segment 279 0d 0c 0b 0a # 0x0a0b0c0d 280 +run: compare EBX with r/m32 -281 +run: effective address is 0x60 (EAX) +281 +run: effective address is 0x2000 (EAX) 282 +run: SF=0; ZF=1; OF=0 283 284 //: 285 286 :(before "End Initialize Op Names(name)") -287 put(name, "3b", "compare: set SF if r32 < rm32"); +287 put(name, "3b", "compare: set SF if r32 < rm32"); 288 289 :(scenario compare_r32_with_mem_at_r32_greater) -290 % Reg[EAX].i = 0x60; +290 % Reg[EAX].i = 0x2000; 291 % Reg[EBX].i = 0x0a0b0c0d; 292 == 0x1 # code segment 293 # op ModR/M SIB displacement immediate 294 3b 18 # compare *EAX with EBX 295 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -296 == 0x60 # data segment +296 == 0x2000 # data segment 297 07 0c 0b 0a # 0x0a0b0c0d 298 +run: compare r/m32 with EBX -299 +run: effective address is 0x60 (EAX) +299 +run: effective address is 0x2000 (EAX) 300 +run: SF=0; ZF=0; OF=0 301 302 :(before "End Single-Byte Opcodes") 303 case 0x3b: { // set SF if r32 < r/m32 -304 uint8_t modrm = next(); +304 uint8_t modrm = next(); 305 uint8_t reg1 = (modrm>>3)&0x7; -306 trace(90, "run") << "compare r/m32 with " << rname(reg1) << end(); +306 trace(90, "run") << "compare r/m32 with " << rname(reg1) << end(); 307 int32_t arg1 = Reg[reg1].i; 308 int32_t* arg2 = effective_address(modrm); 309 int32_t tmp1 = arg1 - *arg2; @@ -375,34 +375,34 @@ if ('onhashchange' in window) { 311 ZF = (tmp1 == 0); 312 int64_t tmp2 = arg1 - *arg2; 313 OF = (tmp1 != tmp2); -314 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +314 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 315 break; 316 } 317 318 :(scenario compare_r32_with_mem_at_r32_lesser) -319 % Reg[EAX].i = 0x60; +319 % Reg[EAX].i = 0x2000; 320 % Reg[EBX].i = 0x0a0b0c07; 321 == 0x1 # code segment 322 # op ModR/M SIB displacement immediate 323 3b 18 # compare *EAX with EBX 324 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -325 == 0x60 # data segment +325 == 0x2000 # data segment 326 0d 0c 0b 0a # 0x0a0b0c0d 327 +run: compare r/m32 with EBX -328 +run: effective address is 0x60 (EAX) +328 +run: effective address is 0x2000 (EAX) 329 +run: SF=1; ZF=0; OF=0 330 331 :(scenario compare_r32_with_mem_at_r32_equal) -332 % Reg[EAX].i = 0x60; +332 % Reg[EAX].i = 0x2000; 333 % Reg[EBX].i = 0x0a0b0c0d; 334 == 0x1 # code segment 335 # op ModR/M SIB displacement immediate 336 3b 18 # compare *EAX with EBX 337 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -338 == 0x60 # data segment +338 == 0x2000 # data segment 339 0d 0c 0b 0a # 0x0a0b0c0d 340 +run: compare r/m32 with EBX -341 +run: effective address is 0x60 (EAX) +341 +run: effective address is 0x2000 (EAX) 342 +run: SF=0; ZF=1; OF=0 343 344 //:: copy (mov) @@ -421,175 +421,175 @@ if ('onhashchange' in window) { 357 //: 358 359 :(before "End Initialize Op Names(name)") -360 put(name, "8b", "copy rm32 to r32"); +360 put(name, "8b", "copy rm32 to r32"); 361 362 :(scenario copy_mem_at_r32_to_r32) -363 % Reg[EAX].i = 0x60; +363 % Reg[EAX].i = 0x2000; 364 == 0x1 # code segment 365 # op ModR/M SIB displacement immediate 366 8b 18 # copy *EAX to EBX 367 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX) -368 == 0x60 # data segment +368 == 0x2000 # data segment 369 af 00 00 00 # 0xaf 370 +run: copy r/m32 to EBX -371 +run: effective address is 0x60 (EAX) +371 +run: effective address is 0x2000 (EAX) 372 +run: storing 0x000000af 373 374 :(before "End Single-Byte Opcodes") 375 case 0x8b: { // copy r32 to r/m32 -376 uint8_t modrm = next(); +376 uint8_t modrm = next(); 377 uint8_t reg1 = (modrm>>3)&0x7; -378 trace(90, "run") << "copy r/m32 to " << rname(reg1) << end(); +378 trace(90, "run") << "copy r/m32 to " << rname(reg1) << end(); 379 int32_t* arg2 = effective_address(modrm); 380 Reg[reg1].i = *arg2; -381 trace(90, "run") << "storing 0x" << HEXWORD << *arg2 << end(); +381 trace(90, "run") << "storing 0x" << HEXWORD << *arg2 << end(); 382 break; 383 } 384 385 //: 386 387 :(before "End Initialize Op Names(name)") -388 put(name, "88", "copy r8 (lowermost byte of r32) to r8/m8-at-r32"); +388 put(name, "88", "copy r8 (lowermost byte of r32) to r8/m8-at-r32"); 389 390 :(scenario copy_r8_to_mem_at_r32) 391 % Reg[EBX].i = 0x224488ab; -392 % Reg[EAX].i = 0x60; +392 % Reg[EAX].i = 0x2000; 393 == 0x1 394 # op ModR/M SIB displacement immediate 395 88 18 # copy just the lowermost byte of EBX to the byte at *EAX 396 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX) -397 == 0x60 +397 == 0x2000 398 f0 cc bb aa # 0xf0 with more data in following bytes 399 +run: copy lowermost byte of EBX to r8/m8-at-r32 -400 +run: effective address is 0x60 (EAX) +400 +run: effective address is 0x2000 (EAX) 401 +run: storing 0xab -402 % CHECK_EQ(0xaabbccab, read_mem_u32(0x60)); +402 % CHECK_EQ(0xaabbccab, read_mem_u32(0x2000)); 403 404 :(before "End Single-Byte Opcodes") 405 case 0x88: { // copy r/m8 to r8 -406 uint8_t modrm = next(); +406 uint8_t modrm = next(); 407 uint8_t reg2 = (modrm>>3)&0x7; -408 trace(90, "run") << "copy lowermost byte of " << rname(reg2) << " to r8/m8-at-r32" << end(); +408 trace(90, "run") << "copy lowermost byte of " << rname(reg2) << " to r8/m8-at-r32" << end(); 409 // use unsigned to zero-extend 8-bit value to 32 bits 410 uint8_t* arg1 = reinterpret_cast<uint8_t*>(effective_address(modrm)); 411 *arg1 = Reg[reg2].u; -412 trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg1) << end(); +412 trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg1) << end(); 413 break; 414 } 415 416 //: 417 418 :(before "End Initialize Op Names(name)") -419 put(name, "8a", "copy r8/m8-at-r32 to r8 (lowermost byte of r32)"); +419 put(name, "8a", "copy r8/m8-at-r32 to r8 (lowermost byte of r32)"); 420 421 :(scenario copy_mem_at_r32_to_r8) 422 % Reg[EBX].i = 0xaabbcc0f; // one nibble each of lowest byte set to all 0s and all 1s, to maximize value of this test -423 % Reg[EAX].i = 0x60; +423 % Reg[EAX].i = 0x2000; 424 == 0x1 425 # op ModR/M SIB displacement immediate 426 8a 18 # copy just the byte at *EAX to lowermost byte of EBX (clearing remaining bytes) 427 # ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX) -428 == 0x60 # data segment +428 == 0x2000 # data segment 429 ab ff ff ff # 0xab with more data in following bytes 430 +run: copy r8/m8-at-r32 to lowermost byte of EBX -431 +run: effective address is 0x60 (EAX) +431 +run: effective address is 0x2000 (EAX) 432 +run: storing 0xab 433 # remaining bytes of EBX are *not* cleared 434 +run: EBX now contains 0xaabbccab 435 436 :(before "End Single-Byte Opcodes") 437 case 0x8a: { // copy r/m8 to r8 -438 uint8_t modrm = next(); +438 uint8_t modrm = next(); 439 uint8_t reg1 = (modrm>>3)&0x7; -440 trace(90, "run") << "copy r8/m8-at-r32 to lowermost byte of " << rname(reg1) << end(); +440 trace(90, "run") << "copy r8/m8-at-r32 to lowermost byte of " << rname(reg1) << end(); 441 // use unsigned to zero-extend 8-bit value to 32 bits 442 uint8_t* arg2 = reinterpret_cast<uint8_t*>(effective_address(modrm)); -443 trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg2) << end(); +443 trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg2) << end(); 444 *reinterpret_cast<uint8_t*>(&Reg[reg1].u) = *arg2; // assumes host is little-endian -445 trace(90, "run") << rname(reg1) << " now contains 0x" << HEXWORD << Reg[reg1].u << end(); +445 trace(90, "run") << rname(reg1) << " now contains 0x" << HEXWORD << Reg[reg1].u << end(); 446 break; 447 } 448 449 //:: jump 450 451 :(scenario jump_mem_at_r32) -452 % Reg[EAX].i = 0x60; +452 % Reg[EAX].i = 0x2000; 453 == 0x1 # code segment 454 # op ModR/M SIB displacement immediate 455 ff 20 # jump to *EAX 456 # ModR/M in binary: 00 (indirect mode) 100 (jump to r/m32) 000 (src EAX) 457 05 00 00 00 01 458 05 00 00 00 02 -459 == 0x60 # data segment +459 == 0x2000 # data segment 460 08 00 00 00 # 8 461 +run: inst: 0x00000001 462 +run: jump to r/m32 -463 +run: effective address is 0x60 (EAX) +463 +run: effective address is 0x2000 (EAX) 464 +run: jumping to 0x00000008 465 +run: inst: 0x00000008 466 -run: inst: 0x00000003 467 468 :(before "End Op ff Subops") 469 case 4: { // jump to r/m32 -470 trace(90, "run") << "jump to r/m32" << end(); +470 trace(90, "run") << "jump to r/m32" << end(); 471 int32_t* arg2 = effective_address(modrm); 472 EIP = *arg2; -473 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +473 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 474 break; 475 } 476 477 //:: push 478 479 :(scenario push_mem_at_r32) -480 % Reg[EAX].i = 0x60; +480 % Reg[EAX].i = 0x2000; 481 % Reg[ESP].u = 0x14; 482 == 0x1 # code segment 483 # op ModR/M SIB displacement immediate 484 ff 30 # push *EAX to stack 485 # ModR/M in binary: 00 (indirect mode) 110 (push r/m32) 000 (src EAX) -486 == 0x60 # data segment +486 == 0x2000 # data segment 487 af 00 00 00 # 0xaf -488 +run: push r/m32 -489 +run: effective address is 0x60 (EAX) +488 +run: push r/m32 +489 +run: effective address is 0x2000 (EAX) 490 +run: decrementing ESP to 0x00000010 491 +run: pushing value 0x000000af 492 493 :(before "End Op ff Subops") 494 case 6: { // push r/m32 to stack -495 trace(90, "run") << "push r/m32" << end(); +495 trace(90, "run") << "push r/m32" << end(); 496 const int32_t* val = effective_address(modrm); -497 push(*val); +497 push(*val); 498 break; 499 } 500 501 //:: pop 502 503 :(before "End Initialize Op Names(name)") -504 put(name, "8f", "pop top of stack to rm32"); +504 put(name, "8f", "pop top of stack to rm32"); 505 506 :(scenario pop_mem_at_r32) 507 % Reg[EAX].i = 0x60; -508 % Reg[ESP].u = 0x10; +508 % Reg[ESP].u = 0x2000; 509 == 0x1 # code segment 510 # op ModR/M SIB displacement immediate 511 8f 00 # pop stack into *EAX 512 # ModR/M in binary: 00 (indirect mode) 000 (pop r/m32) 000 (dest EAX) -513 == 0x10 # data segment +513 == 0x2000 # data segment 514 30 00 00 00 # 0x30 -515 +run: pop into r/m32 +515 +run: pop into r/m32 516 +run: effective address is 0x60 (EAX) 517 +run: popping value 0x00000030 -518 +run: incrementing ESP to 0x00000014 +518 +run: incrementing ESP to 0x00002004 519 520 :(before "End Single-Byte Opcodes") 521 case 0x8f: { // pop stack into r/m32 -522 uint8_t modrm = next(); +522 uint8_t modrm = next(); 523 uint8_t subop = (modrm>>3)&0x7; 524 switch (subop) { 525 case 0: { -526 trace(90, "run") << "pop into r/m32" << end(); +526 trace(90, "run") << "pop into r/m32" << end(); 527 int32_t* dest = effective_address(modrm); -528 *dest = pop(); +528 *dest = pop(); 529 break; 530 } 531 } @@ -602,34 +602,34 @@ if ('onhashchange' in window) { 538 % Reg[EBX].i = 0x10; // source 539 == 0x1 # code segment 540 # op ModR/M SIB displacement immediate -541 01 1d 60 00 00 00 # add EBX to *0x60 +541 01 1d 00 20 00 00 # add EBX to *0x2000 542 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 101 (dest in disp32) -543 == 0x60 # data segment +543 == 0x2000 # data segment 544 01 00 00 00 # 1 545 +run: add EBX to r/m32 -546 +run: effective address is 0x60 (disp32) +546 +run: effective address is 0x2000 (disp32) 547 +run: storing 0x00000011 548 549 :(before "End Mod 0 Special-cases(addr)") 550 case 5: // exception: mod 0b00 rm 0b101 => incoming disp32 -551 addr = next32(); -552 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (disp32)" << end(); +551 addr = next32(); +552 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (disp32)" << end(); 553 break; 554 555 //: 556 557 :(scenario add_r32_to_mem_at_r32_plus_disp8) 558 % Reg[EBX].i = 0x10; // source -559 % Reg[EAX].i = 0x5e; // dest +559 % Reg[EAX].i = 0x1ffe; // dest 560 == 0x1 # code segment 561 # op ModR/M SIB displacement immediate 562 01 58 02 # add EBX to *(EAX+2) 563 # ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX) -564 == 0x60 # data segment +564 == 0x2000 # data segment 565 01 00 00 00 # 1 566 +run: add EBX to r/m32 -567 +run: effective address is initially 0x5e (EAX) -568 +run: effective address is 0x60 (after adding disp8) +567 +run: effective address is initially 0x1ffe (EAX) +568 +run: effective address is 0x2000 (after adding disp8) 569 +run: storing 0x00000011 570 571 :(before "End Mod Special-cases(addr)") @@ -637,44 +637,44 @@ if ('onhashchange' in window) { 573 switch (rm) { 574 default: 575 addr = Reg[rm].u; -576 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); +576 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); 577 break; 578 // End Mod 1 Special-cases(addr) 579 } 580 if (addr > 0) { -581 addr += static_cast<int8_t>(next()); -582 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp8)" << end(); +581 addr += static_cast<int8_t>(next()); +582 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp8)" << end(); 583 } 584 break; 585 586 :(scenario add_r32_to_mem_at_r32_plus_negative_disp8) 587 % Reg[EBX].i = 0x10; // source -588 % Reg[EAX].i = 0x61; // dest +588 % Reg[EAX].i = 0x2001; // dest 589 == 0x1 # code segment 590 # op ModR/M SIB displacement immediate 591 01 58 ff # add EBX to *(EAX-1) 592 # ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX) -593 == 0x60 # data segment +593 == 0x2000 # data segment 594 01 00 00 00 # 1 595 +run: add EBX to r/m32 -596 +run: effective address is initially 0x61 (EAX) -597 +run: effective address is 0x60 (after adding disp8) +596 +run: effective address is initially 0x2001 (EAX) +597 +run: effective address is 0x2000 (after adding disp8) 598 +run: storing 0x00000011 599 600 //: 601 602 :(scenario add_r32_to_mem_at_r32_plus_disp32) 603 % Reg[EBX].i = 0x10; // source -604 % Reg[EAX].i = 0x5e; // dest +604 % Reg[EAX].i = 0x1ffe; // dest 605 == 0x1 # code segment 606 # op ModR/M SIB displacement immediate 607 01 98 02 00 00 00 # add EBX to *(EAX+2) 608 # ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX) -609 == 0x60 # data segment +609 == 0x2000 # data segment 610 01 00 00 00 # 1 611 +run: add EBX to r/m32 -612 +run: effective address is initially 0x5e (EAX) -613 +run: effective address is 0x60 (after adding disp32) +612 +run: effective address is initially 0x1ffe (EAX) +613 +run: effective address is 0x2000 (after adding disp32) 614 +run: storing 0x00000011 615 616 :(before "End Mod Special-cases(addr)") @@ -682,49 +682,49 @@ if ('onhashchange' in window) { 618 switch (rm) { 619 default: 620 addr = Reg[rm].u; -621 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); +621 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); 622 break; 623 // End Mod 2 Special-cases(addr) 624 } 625 if (addr > 0) { -626 addr += next32(); -627 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp32)" << end(); +626 addr += next32(); +627 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp32)" << end(); 628 } 629 break; 630 631 :(scenario add_r32_to_mem_at_r32_plus_negative_disp32) 632 % Reg[EBX].i = 0x10; // source -633 % Reg[EAX].i = 0x61; // dest +633 % Reg[EAX].i = 0x2001; // dest 634 == 0x1 # code segment 635 # op ModR/M SIB displacement immediate 636 01 98 ff ff ff ff # add EBX to *(EAX-1) 637 # ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX) -638 == 0x60 # data segment +638 == 0x2000 # data segment 639 01 00 00 00 # 1 640 +run: add EBX to r/m32 -641 +run: effective address is initially 0x61 (EAX) -642 +run: effective address is 0x60 (after adding disp32) +641 +run: effective address is initially 0x2001 (EAX) +642 +run: effective address is 0x2000 (after adding disp32) 643 +run: storing 0x00000011 644 645 //:: lea 646 647 :(before "End Initialize Op Names(name)") -648 put(name, "8d", "load effective address of memory in rm32 into r32"); +648 put(name, "8d", "load effective address of memory in rm32 into r32"); 649 650 :(scenario lea) -651 % Reg[EAX].u = 0x60; +651 % Reg[EAX].u = 0x2000; 652 == 0x1 653 # op ModR/M SIB displacement immediate 654 8d 18 655 # ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX) 656 +run: lea into EBX -657 +run: effective address is 0x60 (EAX) +657 +run: effective address is 0x2000 (EAX) 658 659 :(before "End Single-Byte Opcodes") 660 case 0x8d: { // lea m32 to r32 -661 uint8_t modrm = next(); +661 uint8_t modrm = next(); 662 uint8_t arg1 = (modrm>>3)&0x7; -663 trace(90, "run") << "lea into " << rname(arg1) << end(); +663 trace(90, "run") << "lea into " << rname(arg1) << end(); 664 Reg[arg1].u = effective_address_number(modrm); 665 break; 666 } diff --git a/html/subx/015immediate_addressing.cc.html b/html/subx/015immediate_addressing.cc.html index 1a1182fc..83ac6f4f 100644 --- a/html/subx/015immediate_addressing.cc.html +++ b/html/subx/015immediate_addressing.cc.html @@ -65,7 +65,7 @@ if ('onhashchange' in window) { 1 //: instructions that (immediately) contain an argument to act with 2 3 :(before "End Initialize Op Names(name)") - 4 put(name, "81", "combine rm32 with imm32 based on subop"); + 4 put(name, "81", "combine rm32 with imm32 based on subop"); 5 6 :(scenario add_imm32_to_r32) 7 % Reg[EBX].i = 1; @@ -81,20 +81,20 @@ if ('onhashchange' in window) { 17 18 :(before "End Single-Byte Opcodes") 19 case 0x81: { // combine imm32 with r/m32 - 20 trace(90, "run") << "combine imm32 with r/m32" << end(); - 21 uint8_t modrm = next(); + 20 trace(90, "run") << "combine imm32 with r/m32" << end(); + 21 uint8_t modrm = next(); 22 int32_t* arg1 = effective_address(modrm); - 23 int32_t arg2 = next32(); - 24 trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); + 23 int32_t arg2 = next32(); + 24 trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); 25 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits 26 switch (subop) { 27 case 0: - 28 trace(90, "run") << "subop add" << end(); + 28 trace(90, "run") << "subop add" << end(); 29 BINARY_ARITHMETIC_OP(+, *arg1, arg2); 30 break; 31 // End Op 81 Subops 32 default: - 33 cerr << "unrecognized sub-opcode after 81: " << NUM(subop) << '\n'; + 33 cerr << "unrecognized sub-opcode after 81: " << NUM(subop) << '\n'; 34 exit(1); 35 } 36 break; @@ -103,15 +103,15 @@ if ('onhashchange' in window) { 39 //: 40 41 :(scenario add_imm32_to_mem_at_r32) - 42 % Reg[EBX].i = 0x60; + 42 % Reg[EBX].i = 0x2000; 43 == 0x01 # code segment 44 # op ModR/M SIB displacement immediate 45 81 03 0a 0b 0c 0d # add 0x0d0c0b0a to *EBX 46 # ModR/M in binary: 00 (indirect mode) 000 (add imm32) 011 (dest EBX) - 47 == 0x60 # data segment + 47 == 0x2000 # data segment 48 01 00 00 00 # 1 49 +run: combine imm32 with r/m32 - 50 +run: effective address is 0x60 (EBX) + 50 +run: effective address is 0x2000 (EBX) 51 +run: imm32 is 0x0d0c0b0a 52 +run: subop add 53 +run: storing 0x0d0c0b0b @@ -119,7 +119,7 @@ if ('onhashchange' in window) { 55 //:: subtract 56 57 :(before "End Initialize Op Names(name)") - 58 put(name, "2d", "subtract imm32 from R0 (EAX)"); + 58 put(name, "2d", "subtract imm32 from R0 (EAX)"); 59 60 :(scenario subtract_imm32_from_eax) 61 % Reg[EAX].i = 0x0d0c0baa; @@ -131,8 +131,8 @@ if ('onhashchange' in window) { 67 68 :(before "End Single-Byte Opcodes") 69 case 0x2d: { // subtract imm32 from EAX - 70 int32_t arg2 = next32(); - 71 trace(90, "run") << "subtract imm32 0x" << HEXWORD << arg2 << " from EAX" << end(); + 70 int32_t arg2 = next32(); + 71 trace(90, "run") << "subtract imm32 0x" << HEXWORD << arg2 << " from EAX" << end(); 72 BINARY_ARITHMETIC_OP(-, Reg[EAX].i, arg2); 73 break; 74 } @@ -140,22 +140,22 @@ if ('onhashchange' in window) { 76 //: 77 78 :(scenario subtract_imm32_from_mem_at_r32) - 79 % Reg[EBX].i = 0x60; + 79 % Reg[EBX].i = 0x2000; 80 == 0x01 # code segment 81 # op ModR/M SIB displacement immediate 82 81 2b 01 00 00 00 # subtract 1 from *EBX 83 # ModR/M in binary: 00 (indirect mode) 101 (subtract imm32) 011 (dest EBX) - 84 == 0x60 # data segment + 84 == 0x2000 # data segment 85 0a 00 00 00 # 10 86 +run: combine imm32 with r/m32 - 87 +run: effective address is 0x60 (EBX) + 87 +run: effective address is 0x2000 (EBX) 88 +run: imm32 is 0x00000001 89 +run: subop subtract 90 +run: storing 0x00000009 91 92 :(before "End Op 81 Subops") 93 case 5: { - 94 trace(90, "run") << "subop subtract" << end(); + 94 trace(90, "run") << "subop subtract" << end(); 95 BINARY_ARITHMETIC_OP(-, *arg1, arg2); 96 break; 97 } @@ -177,7 +177,7 @@ if ('onhashchange' in window) { 113 //:: and 114 115 :(before "End Initialize Op Names(name)") -116 put(name, "25", "R0 = bitwise AND of imm32 with R0 (EAX)"); +116 put(name, "25", "R0 = bitwise AND of imm32 with R0 (EAX)"); 117 118 :(scenario and_imm32_with_eax) 119 % Reg[EAX].i = 0xff; @@ -189,8 +189,8 @@ if ('onhashchange' in window) { 125 126 :(before "End Single-Byte Opcodes") 127 case 0x25: { // and imm32 with EAX -128 int32_t arg2 = next32(); -129 trace(90, "run") << "and imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); +128 int32_t arg2 = next32(); +129 trace(90, "run") << "and imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); 130 BINARY_BITWISE_OP(&, Reg[EAX].i, arg2); 131 break; 132 } @@ -198,22 +198,22 @@ if ('onhashchange' in window) { 134 //: 135 136 :(scenario and_imm32_with_mem_at_r32) -137 % Reg[EBX].i = 0x60; +137 % Reg[EBX].i = 0x2000; 138 == 0x01 # code segment 139 # op ModR/M SIB displacement immediate 140 81 23 0a 0b 0c 0d # and 0x0d0c0b0a with *EBX 141 # ModR/M in binary: 00 (indirect mode) 100 (and imm32) 011 (dest EBX) -142 == 0x60 # data segment +142 == 0x2000 # data segment 143 ff 00 00 00 # 0xff 144 +run: combine imm32 with r/m32 -145 +run: effective address is 0x60 (EBX) +145 +run: effective address is 0x2000 (EBX) 146 +run: imm32 is 0x0d0c0b0a 147 +run: subop and 148 +run: storing 0x0000000a 149 150 :(before "End Op 81 Subops") 151 case 4: { -152 trace(90, "run") << "subop and" << end(); +152 trace(90, "run") << "subop and" << end(); 153 BINARY_BITWISE_OP(&, *arg1, arg2); 154 break; 155 } @@ -235,7 +235,7 @@ if ('onhashchange' in window) { 171 //:: or 172 173 :(before "End Initialize Op Names(name)") -174 put(name, "0d", "R0 = bitwise OR of imm32 with R0 (EAX)"); +174 put(name, "0d", "R0 = bitwise OR of imm32 with R0 (EAX)"); 175 176 :(scenario or_imm32_with_eax) 177 % Reg[EAX].i = 0xd0c0b0a0; @@ -247,8 +247,8 @@ if ('onhashchange' in window) { 183 184 :(before "End Single-Byte Opcodes") 185 case 0x0d: { // or imm32 with EAX -186 int32_t arg2 = next32(); -187 trace(90, "run") << "or imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); +186 int32_t arg2 = next32(); +187 trace(90, "run") << "or imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); 188 BINARY_BITWISE_OP(|, Reg[EAX].i, arg2); 189 break; 190 } @@ -256,22 +256,22 @@ if ('onhashchange' in window) { 192 //: 193 194 :(scenario or_imm32_with_mem_at_r32) -195 % Reg[EBX].i = 0x60; +195 % Reg[EBX].i = 0x2000; 196 == 0x01 # code segment 197 # op ModR/M SIB displacement immediate 198 81 0b 0a 0b 0c 0d # or 0x0d0c0b0a with *EBX 199 # ModR/M in binary: 00 (indirect mode) 001 (or imm32) 011 (dest EBX) -200 == 0x60 # data segment +200 == 0x2000 # data segment 201 a0 b0 c0 d0 # 0xd0c0b0a0 202 +run: combine imm32 with r/m32 -203 +run: effective address is 0x60 (EBX) +203 +run: effective address is 0x2000 (EBX) 204 +run: imm32 is 0x0d0c0b0a 205 +run: subop or 206 +run: storing 0xddccbbaa 207 208 :(before "End Op 81 Subops") 209 case 1: { -210 trace(90, "run") << "subop or" << end(); +210 trace(90, "run") << "subop or" << end(); 211 BINARY_BITWISE_OP(|, *arg1, arg2); 212 break; 213 } @@ -291,7 +291,7 @@ if ('onhashchange' in window) { 227 //:: xor 228 229 :(before "End Initialize Op Names(name)") -230 put(name, "35", "R0 = bitwise XOR of imm32 with R0 (EAX)"); +230 put(name, "35", "R0 = bitwise XOR of imm32 with R0 (EAX)"); 231 232 :(scenario xor_imm32_with_eax) 233 % Reg[EAX].i = 0xddccb0a0; @@ -303,8 +303,8 @@ if ('onhashchange' in window) { 239 240 :(before "End Single-Byte Opcodes") 241 case 0x35: { // xor imm32 with EAX -242 int32_t arg2 = next32(); -243 trace(90, "run") << "xor imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); +242 int32_t arg2 = next32(); +243 trace(90, "run") << "xor imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); 244 BINARY_BITWISE_OP(^, Reg[EAX].i, arg2); 245 break; 246 } @@ -312,22 +312,22 @@ if ('onhashchange' in window) { 248 //: 249 250 :(scenario xor_imm32_with_mem_at_r32) -251 % Reg[EBX].i = 0x60; +251 % Reg[EBX].i = 0x2000; 252 == 0x01 # code segment 253 # op ModR/M SIB displacement immediate 254 81 33 0a 0b 0c 0d # xor 0x0d0c0b0a with *EBX 255 # ModR/M in binary: 00 (indirect mode) 110 (xor imm32) 011 (dest EBX) -256 == 0x60 # data segment +256 == 0x2000 # data segment 257 a0 b0 c0 d0 # 0xd0c0b0a0 258 +run: combine imm32 with r/m32 -259 +run: effective address is 0x60 (EBX) +259 +run: effective address is 0x2000 (EBX) 260 +run: imm32 is 0x0d0c0b0a 261 +run: subop xor 262 +run: storing 0xddccbbaa 263 264 :(before "End Op 81 Subops") 265 case 6: { -266 trace(90, "run") << "subop xor" << end(); +266 trace(90, "run") << "subop xor" << end(); 267 BINARY_BITWISE_OP(^, *arg1, arg2); 268 break; 269 } @@ -347,7 +347,7 @@ if ('onhashchange' in window) { 283 //:: compare (cmp) 284 285 :(before "End Initialize Op Names(name)") -286 put(name, "3d", "compare: set SF if R0 < imm32"); +286 put(name, "3d", "compare: set SF if R0 < imm32"); 287 288 :(scenario compare_imm32_with_eax_greater) 289 % Reg[EAX].i = 0x0d0c0b0a; @@ -360,14 +360,14 @@ if ('onhashchange' in window) { 296 :(before "End Single-Byte Opcodes") 297 case 0x3d: { // compare EAX with imm32 298 int32_t arg1 = Reg[EAX].i; -299 int32_t arg2 = next32(); -300 trace(90, "run") << "compare EAX and imm32 0x" << HEXWORD << arg2 << end(); +299 int32_t arg2 = next32(); +300 trace(90, "run") << "compare EAX and imm32 0x" << HEXWORD << arg2 << end(); 301 int32_t tmp1 = arg1 - arg2; 302 SF = (tmp1 < 0); 303 ZF = (tmp1 == 0); 304 int64_t tmp2 = arg1 - arg2; 305 OF = (tmp1 != tmp2); -306 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +306 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 307 break; 308 } 309 @@ -402,13 +402,13 @@ if ('onhashchange' in window) { 338 339 :(before "End Op 81 Subops") 340 case 7: { -341 trace(90, "run") << "subop compare" << end(); +341 trace(90, "run") << "subop compare" << end(); 342 int32_t tmp1 = *arg1 - arg2; 343 SF = (tmp1 < 0); 344 ZF = (tmp1 == 0); 345 int64_t tmp2 = *arg1 - arg2; 346 OF = (tmp1 != tmp2); -347 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +347 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 348 break; 349 } 350 @@ -435,56 +435,56 @@ if ('onhashchange' in window) { 371 +run: SF=0; ZF=1; OF=0 372 373 :(scenario compare_imm32_with_mem_at_r32_greater) -374 % Reg[EBX].i = 0x60; +374 % Reg[EBX].i = 0x2000; 375 == 0x01 # code segment 376 # op ModR/M SIB displacement immediate 377 81 3b 07 0b 0c 0d # compare 0x0d0c0b07 with *EBX 378 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) -379 == 0x60 # data segment +379 == 0x2000 # data segment 380 0a 0b 0c 0d # 0x0d0c0b0a 381 +run: combine imm32 with r/m32 -382 +run: effective address is 0x60 (EBX) +382 +run: effective address is 0x2000 (EBX) 383 +run: imm32 is 0x0d0c0b07 384 +run: SF=0; ZF=0; OF=0 385 386 :(scenario compare_imm32_with_mem_at_r32_lesser) -387 % Reg[EBX].i = 0x60; +387 % Reg[EBX].i = 0x2000; 388 == 0x01 # code segment 389 # op ModR/M SIB displacement immediate 390 81 3b 0a 0b 0c 0d # compare 0x0d0c0b0a with *EBX 391 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) -392 == 0x60 # data segment +392 == 0x2000 # data segment 393 07 0b 0c 0d # 0x0d0c0b07 394 +run: combine imm32 with r/m32 -395 +run: effective address is 0x60 (EBX) +395 +run: effective address is 0x2000 (EBX) 396 +run: imm32 is 0x0d0c0b0a 397 +run: SF=1; ZF=0; OF=0 398 399 :(scenario compare_imm32_with_mem_at_r32_equal) 400 % Reg[EBX].i = 0x0d0c0b0a; -401 % Reg[EBX].i = 0x60; +401 % Reg[EBX].i = 0x2000; 402 == 0x01 # code segment 403 # op ModR/M SIB displacement immediate 404 81 3b 0a 0b 0c 0d # compare 0x0d0c0b0a with *EBX 405 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) -406 == 0x60 # data segment +406 == 0x2000 # data segment 407 0a 0b 0c 0d # 0x0d0c0b0a 408 +run: combine imm32 with r/m32 -409 +run: effective address is 0x60 (EBX) +409 +run: effective address is 0x2000 (EBX) 410 +run: imm32 is 0x0d0c0b0a 411 +run: SF=0; ZF=1; OF=0 412 413 //:: copy (mov) 414 415 :(before "End Initialize Op Names(name)") -416 put(name, "b8", "copy imm32 to R0 (EAX)"); -417 put(name, "b9", "copy imm32 to R1 (ECX)"); -418 put(name, "ba", "copy imm32 to R2 (EDX)"); -419 put(name, "bb", "copy imm32 to R3 (EBX)"); -420 put(name, "bc", "copy imm32 to R4 (ESP)"); -421 put(name, "bd", "copy imm32 to R5 (EBP)"); -422 put(name, "be", "copy imm32 to R6 (ESI)"); -423 put(name, "bf", "copy imm32 to R7 (EDI)"); +416 put(name, "b8", "copy imm32 to R0 (EAX)"); +417 put(name, "b9", "copy imm32 to R1 (ECX)"); +418 put(name, "ba", "copy imm32 to R2 (EDX)"); +419 put(name, "bb", "copy imm32 to R3 (EBX)"); +420 put(name, "bc", "copy imm32 to R4 (ESP)"); +421 put(name, "bd", "copy imm32 to R5 (EBP)"); +422 put(name, "be", "copy imm32 to R6 (ESI)"); +423 put(name, "bf", "copy imm32 to R7 (EDI)"); 424 425 :(scenario copy_imm32_to_r32) 426 == 0x1 @@ -502,8 +502,8 @@ if ('onhashchange' in window) { 438 case 0xbe: 439 case 0xbf: { // copy imm32 to r32 440 uint8_t reg1 = op & 0x7; -441 int32_t arg2 = next32(); -442 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to " << rname(reg1) << end(); +441 int32_t arg2 = next32(); +442 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to " << rname(reg1) << end(); 443 Reg[reg1].i = arg2; 444 break; 445 } @@ -511,7 +511,7 @@ if ('onhashchange' in window) { 447 //: 448 449 :(before "End Initialize Op Names(name)") -450 put(name, "c7", "copy imm32 to rm32"); +450 put(name, "c7", "copy imm32 to rm32"); 451 452 :(scenario copy_imm32_to_mem_at_r32) 453 % Reg[EBX].i = 0x60; @@ -525,11 +525,11 @@ if ('onhashchange' in window) { 461 462 :(before "End Single-Byte Opcodes") 463 case 0xc7: { // copy imm32 to r32 -464 uint8_t modrm = next(); -465 trace(90, "run") << "copy imm32 to r/m32" << end(); +464 uint8_t modrm = next(); +465 trace(90, "run") << "copy imm32 to r/m32" << end(); 466 int32_t* arg1 = effective_address(modrm); -467 int32_t arg2 = next32(); -468 trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); +467 int32_t arg2 = next32(); +468 trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); 469 *arg1 = arg2; 470 break; 471 } @@ -537,25 +537,25 @@ if ('onhashchange' in window) { 473 //:: push 474 475 :(before "End Initialize Op Names(name)") -476 put(name, "68", "push imm32 to stack"); +476 put(name, "68", "push imm32 to stack"); 477 478 :(scenario push_imm32) 479 % Reg[ESP].u = 0x14; 480 == 0x1 481 # op ModR/M SIB displacement immediate 482 68 af 00 00 00 # push *EAX to stack -483 +run: push imm32 0x000000af +483 +run: push imm32 0x000000af 484 +run: ESP is now 0x00000010 485 +run: contents at ESP: 0x000000af 486 487 :(before "End Single-Byte Opcodes") 488 case 0x68: { -489 uint32_t val = static_cast<uint32_t>(next32()); -490 trace(90, "run") << "push imm32 0x" << HEXWORD << val << end(); +489 uint32_t val = static_cast<uint32_t>(next32()); +490 trace(90, "run") << "push imm32 0x" << HEXWORD << val << end(); 491 //? cerr << "push: " << val << " => " << Reg[ESP].u << '\n'; -492 push(val); -493 trace(90, "run") << "ESP is now 0x" << HEXWORD << Reg[ESP].u << end(); -494 trace(90, "run") << "contents at ESP: 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << end(); +492 push(val); +493 trace(90, "run") << "ESP is now 0x" << HEXWORD << Reg[ESP].u << end(); +494 trace(90, "run") << "contents at ESP: 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << end(); 495 break; 496 } diff --git a/html/subx/016index_addressing.cc.html b/html/subx/016index_addressing.cc.html index 74d4d91d..f5801589 100644 --- a/html/subx/016index_addressing.cc.html +++ b/html/subx/016index_addressing.cc.html @@ -64,17 +64,17 @@ if ('onhashchange' in window) { 2 3 :(scenario add_r32_to_mem_at_r32_with_sib) 4 % Reg[EBX].i = 0x10; - 5 % Reg[EAX].i = 0x60; + 5 % Reg[EAX].i = 0x2000; 6 == 0x1 # code segment 7 # op ModR/M SIB displacement immediate 8 01 1c 20 # add EBX to *EAX 9 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 100 (dest in SIB) 10 # SIB in binary: 00 (scale 1) 100 (no index) 000 (base EAX) - 11 == 0x60 # data segment + 11 == 0x2000 # data segment 12 01 00 00 00 # 1 13 +run: add EBX to r/m32 - 14 +run: effective address is initially 0x60 (EAX) - 15 +run: effective address is 0x60 + 14 +run: effective address is initially 0x2000 (EAX) + 15 +run: effective address is 0x2000 16 +run: storing 0x00000011 17 18 :(before "End Mod 0 Special-cases(addr)") @@ -83,78 +83,78 @@ if ('onhashchange' in window) { 21 break; 22 :(code) 23 uint32_t effective_address_from_sib(uint8_t mod) { - 24 uint8_t sib = next(); + 24 uint8_t sib = next(); 25 uint8_t base = sib&0x7; 26 uint32_t addr = 0; 27 if (base != EBP || mod != 0) { 28 addr = Reg[base].u; - 29 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(base) << ")" << end(); + 29 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(base) << ")" << end(); 30 } 31 else { 32 // base == EBP && mod == 0 - 33 addr = next32(); // ignore base - 34 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (disp32)" << end(); + 33 addr = next32(); // ignore base + 34 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (disp32)" << end(); 35 } 36 uint8_t index = (sib>>3)&0x7; 37 if (index == ESP) { 38 // ignore index and scale - 39 trace(90, "run") << "effective address is 0x" << std::hex << addr << end(); + 39 trace(90, "run") << "effective address is 0x" << std::hex << addr << end(); 40 } 41 else { 42 uint8_t scale = (1 << (sib>>6)); 43 addr += Reg[index].i*scale; // treat index register as signed. Maybe base as well? But we'll always ensure it's non-negative. - 44 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding " << rname(index) << "*" << NUM(scale) << ")" << end(); + 44 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding " << rname(index) << "*" << NUM(scale) << ")" << end(); 45 } 46 return addr; 47 } 48 49 :(scenario add_r32_to_mem_at_base_r32_index_r32) 50 % Reg[EBX].i = 0x10; // source - 51 % Reg[EAX].i = 0x5e; // dest base + 51 % Reg[EAX].i = 0x1ffe; // dest base 52 % Reg[ECX].i = 0x2; // dest index 53 == 0x1 # code segment 54 # op ModR/M SIB displacement immediate 55 01 1c 08 # add EBX to *(EAX+ECX) 56 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 100 (dest in SIB) 57 # SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX) - 58 == 0x60 # data segment + 58 == 0x2000 # data segment 59 01 00 00 00 # 1 60 +run: add EBX to r/m32 - 61 +run: effective address is initially 0x5e (EAX) - 62 +run: effective address is 0x60 (after adding ECX*1) + 61 +run: effective address is initially 0x1ffe (EAX) + 62 +run: effective address is 0x2000 (after adding ECX*1) 63 +run: storing 0x00000011 64 65 :(scenario add_r32_to_mem_at_displacement_using_sib) 66 % Reg[EBX].i = 0x10; // source 67 == 0x1 # code segment 68 # op ModR/M SIB displacement immediate - 69 01 1c 25 60 00 00 00 # add EBX to *0x60 + 69 01 1c 25 00 20 00 00 # add EBX to *0x2000 70 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 100 (dest in SIB) 71 # SIB in binary: 00 (scale 1) 100 (no index) 101 (not EBP but disp32) - 72 == 0x60 # data segment + 72 == 0x2000 # data segment 73 01 00 00 00 # 1 74 +run: add EBX to r/m32 - 75 +run: effective address is initially 0x60 (disp32) - 76 +run: effective address is 0x60 + 75 +run: effective address is initially 0x2000 (disp32) + 76 +run: effective address is 0x2000 77 +run: storing 0x00000011 78 79 //: 80 81 :(scenario add_r32_to_mem_at_base_r32_index_r32_plus_disp8) 82 % Reg[EBX].i = 0x10; // source - 83 % Reg[EAX].i = 0x59; // dest base + 83 % Reg[EAX].i = 0x1ff9; // dest base 84 % Reg[ECX].i = 0x5; // dest index 85 == 0x1 # code segment 86 # op ModR/M SIB displacement immediate 87 01 5c 08 02 # add EBX to *(EAX+ECX+2) 88 # ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 100 (dest in SIB) 89 # SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX) - 90 == 0x60 # data segment + 90 == 0x2000 # data segment 91 01 00 00 00 # 1 92 +run: add EBX to r/m32 - 93 +run: effective address is initially 0x59 (EAX) - 94 +run: effective address is 0x5e (after adding ECX*1) - 95 +run: effective address is 0x60 (after adding disp8) + 93 +run: effective address is initially 0x1ff9 (EAX) + 94 +run: effective address is 0x1ffe (after adding ECX*1) + 95 +run: effective address is 0x2000 (after adding disp8) 96 +run: storing 0x00000011 97 98 :(before "End Mod 1 Special-cases(addr)") @@ -166,19 +166,19 @@ if ('onhashchange' in window) { 104 105 :(scenario add_r32_to_mem_at_base_r32_index_r32_plus_disp32) 106 % Reg[EBX].i = 0x10; // source -107 % Reg[EAX].i = 0x59; // dest base +107 % Reg[EAX].i = 0x1ff9; // dest base 108 % Reg[ECX].i = 0x5; // dest index 109 == 0x1 # code segment 110 # op ModR/M SIB displacement immediate 111 01 9c 08 02 00 00 00 # add EBX to *(EAX+ECX+2) 112 # ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 100 (dest in SIB) 113 # SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX) -114 == 0x60 # data segment +114 == 0x2000 # data segment 115 01 00 00 00 # 1 116 +run: add EBX to r/m32 -117 +run: effective address is initially 0x59 (EAX) -118 +run: effective address is 0x5e (after adding ECX*1) -119 +run: effective address is 0x60 (after adding disp32) +117 +run: effective address is initially 0x1ff9 (EAX) +118 +run: effective address is 0x1ffe (after adding ECX*1) +119 +run: effective address is 0x2000 (after adding disp32) 120 +run: storing 0x00000011 121 122 :(before "End Mod 2 Special-cases(addr)") diff --git a/html/subx/017jump_disp8.cc.html b/html/subx/017jump_disp8.cc.html index bc581bd6..a29779b1 100644 --- a/html/subx/017jump_disp8.cc.html +++ b/html/subx/017jump_disp8.cc.html @@ -66,7 +66,7 @@ if ('onhashchange' in window) { 3 //:: jump 4 5 :(before "End Initialize Op Names(name)") - 6 put(name, "eb", "jump disp8 bytes away"); + 6 put(name, "eb", "jump disp8 bytes away"); 7 8 :(scenario jump_rel8) 9 == 0x1 @@ -81,8 +81,8 @@ if ('onhashchange' in window) { 18 19 :(before "End Single-Byte Opcodes") 20 case 0xeb: { // jump rel8 - 21 int8_t offset = static_cast<int>(next()); - 22 trace(90, "run") << "jump " << NUM(offset) << end(); + 21 int8_t offset = static_cast<int>(next()); + 22 trace(90, "run") << "jump " << NUM(offset) << end(); 23 EIP += offset; 24 break; 25 } @@ -90,7 +90,7 @@ if ('onhashchange' in window) { 27 //:: jump if equal/zero 28 29 :(before "End Initialize Op Names(name)") - 30 put(name, "74", "jump disp8 bytes away if ZF is set"); + 30 put(name, "74", "jump disp8 bytes away if ZF is set"); 31 32 :(scenario je_rel8_success) 33 % ZF = true; @@ -106,9 +106,9 @@ if ('onhashchange' in window) { 43 44 :(before "End Single-Byte Opcodes") 45 case 0x74: { // jump rel8 if ZF - 46 int8_t offset = static_cast<int>(next()); + 46 int8_t offset = static_cast<int>(next()); 47 if (ZF) { - 48 trace(90, "run") << "jump " << NUM(offset) << end(); + 48 trace(90, "run") << "jump " << NUM(offset) << end(); 49 EIP += offset; 50 } 51 break; @@ -129,7 +129,7 @@ if ('onhashchange' in window) { 66 //:: jump if not equal/not zero 67 68 :(before "End Initialize Op Names(name)") - 69 put(name, "75", "jump disp8 bytes away if ZF is not set"); + 69 put(name, "75", "jump disp8 bytes away if ZF is not set"); 70 71 :(scenario jne_rel8_success) 72 % ZF = false; @@ -145,9 +145,9 @@ if ('onhashchange' in window) { 82 83 :(before "End Single-Byte Opcodes") 84 case 0x75: { // jump rel8 unless ZF - 85 int8_t offset = static_cast<int>(next()); + 85 int8_t offset = static_cast<int>(next()); 86 if (!ZF) { - 87 trace(90, "run") << "jump " << NUM(offset) << end(); + 87 trace(90, "run") << "jump " << NUM(offset) << end(); 88 EIP += offset; 89 } 90 break; @@ -168,7 +168,7 @@ if ('onhashchange' in window) { 105 //:: jump if greater 106 107 :(before "End Initialize Op Names(name)") -108 put(name, "7f", "jump disp8 bytes away if greater (ZF is unset, SF == OF)"); +108 put(name, "7f", "jump disp8 bytes away if greater (ZF is unset, SF == OF)"); 109 110 :(scenario jg_rel8_success) 111 % ZF = false; @@ -186,9 +186,9 @@ if ('onhashchange' in window) { 123 124 :(before "End Single-Byte Opcodes") 125 case 0x7f: { // jump rel8 if !SF and !ZF -126 int8_t offset = static_cast<int>(next()); +126 int8_t offset = static_cast<int>(next()); 127 if (!ZF && SF == OF) { -128 trace(90, "run") << "jump " << NUM(offset) << end(); +128 trace(90, "run") << "jump " << NUM(offset) << end(); 129 EIP += offset; 130 } 131 break; @@ -211,7 +211,7 @@ if ('onhashchange' in window) { 148 //:: jump if greater or equal 149 150 :(before "End Initialize Op Names(name)") -151 put(name, "7d", "jump disp8 bytes away if greater or equal (SF == OF)"); +151 put(name, "7d", "jump disp8 bytes away if greater or equal (SF == OF)"); 152 153 :(scenario jge_rel8_success) 154 % SF = false; @@ -228,9 +228,9 @@ if ('onhashchange' in window) { 165 166 :(before "End Single-Byte Opcodes") 167 case 0x7d: { // jump rel8 if !SF -168 int8_t offset = static_cast<int>(next()); +168 int8_t offset = static_cast<int>(next()); 169 if (SF == OF) { -170 trace(90, "run") << "jump " << NUM(offset) << end(); +170 trace(90, "run") << "jump " << NUM(offset) << end(); 171 EIP += offset; 172 } 173 break; @@ -252,7 +252,7 @@ if ('onhashchange' in window) { 189 //:: jump if lesser 190 191 :(before "End Initialize Op Names(name)") -192 put(name, "7c", "jump disp8 bytes away if lesser (SF != OF)"); +192 put(name, "7c", "jump disp8 bytes away if lesser (SF != OF)"); 193 194 :(scenario jl_rel8_success) 195 % ZF = false; @@ -270,9 +270,9 @@ if ('onhashchange' in window) { 207 208 :(before "End Single-Byte Opcodes") 209 case 0x7c: { // jump rel8 if SF and !ZF -210 int8_t offset = static_cast<int>(next()); +210 int8_t offset = static_cast<int>(next()); 211 if (SF != OF) { -212 trace(90, "run") << "jump " << NUM(offset) << end(); +212 trace(90, "run") << "jump " << NUM(offset) << end(); 213 EIP += offset; 214 } 215 break; @@ -295,7 +295,7 @@ if ('onhashchange' in window) { 232 //:: jump if lesser or equal 233 234 :(before "End Initialize Op Names(name)") -235 put(name, "7e", "jump disp8 bytes away if lesser or equal (ZF is set or SF != OF)"); +235 put(name, "7e", "jump disp8 bytes away if lesser or equal (ZF is set or SF != OF)"); 236 237 :(scenario jle_rel8_equal) 238 % ZF = true; @@ -327,9 +327,9 @@ if ('onhashchange' in window) { 264 265 :(before "End Single-Byte Opcodes") 266 case 0x7e: { // jump rel8 if SF or ZF -267 int8_t offset = static_cast<int>(next()); +267 int8_t offset = static_cast<int>(next()); 268 if (ZF || SF != OF) { -269 trace(90, "run") << "jump " << NUM(offset) << end(); +269 trace(90, "run") << "jump " << NUM(offset) << end(); 270 EIP += offset; 271 } 272 break; diff --git a/html/subx/018jump_disp16.cc.html b/html/subx/018jump_disp16.cc.html index 2cbbac02..050d71eb 100644 --- a/html/subx/018jump_disp16.cc.html +++ b/html/subx/018jump_disp16.cc.html @@ -66,7 +66,7 @@ if ('onhashchange' in window) { 3 //:: jump 4 5 :(before "End Initialize Op Names(name)") - 6 put(name, "e9", "jump disp16 bytes away"); + 6 put(name, "e9", "jump disp16 bytes away"); 7 8 :(scenario jump_rel16) 9 == 0x1 @@ -82,21 +82,21 @@ if ('onhashchange' in window) { 19 :(before "End Single-Byte Opcodes") 20 case 0xe9: { // jump rel8 21 int16_t offset = imm16(); - 22 trace(90, "run") << "jump " << offset << end(); + 22 trace(90, "run") << "jump " << offset << end(); 23 EIP += offset; 24 break; 25 } 26 :(code) 27 int16_t imm16() { - 28 int16_t result = next(); - 29 result |= (next()<<8); + 28 int16_t result = next(); + 29 result |= (next()<<8); 30 return result; 31 } 32 33 //:: jump if equal/zero 34 35 :(before "End Initialize Op Names(name)") - 36 put(name_0f, "84", "jump disp16 bytes away if ZF is set"); + 36 put(name_0f, "84", "jump disp16 bytes away if ZF is set"); 37 38 :(scenario je_rel16_success) 39 % ZF = true; @@ -114,7 +114,7 @@ if ('onhashchange' in window) { 51 case 0x84: { // jump rel16 if ZF 52 int8_t offset = imm16(); 53 if (ZF) { - 54 trace(90, "run") << "jump " << NUM(offset) << end(); + 54 trace(90, "run") << "jump " << NUM(offset) << end(); 55 EIP += offset; 56 } 57 break; @@ -135,7 +135,7 @@ if ('onhashchange' in window) { 72 //:: jump if not equal/not zero 73 74 :(before "End Initialize Op Names(name)") - 75 put(name_0f, "85", "jump disp16 bytes away if ZF is not set"); + 75 put(name_0f, "85", "jump disp16 bytes away if ZF is not set"); 76 77 :(scenario jne_rel16_success) 78 % ZF = false; @@ -153,7 +153,7 @@ if ('onhashchange' in window) { 90 case 0x85: { // jump rel16 unless ZF 91 int8_t offset = imm16(); 92 if (!ZF) { - 93 trace(90, "run") << "jump " << NUM(offset) << end(); + 93 trace(90, "run") << "jump " << NUM(offset) << end(); 94 EIP += offset; 95 } 96 break; @@ -174,7 +174,7 @@ if ('onhashchange' in window) { 111 //:: jump if greater 112 113 :(before "End Initialize Op Names(name)") -114 put(name_0f, "8f", "jump disp16 bytes away if greater (ZF is unset, SF == OF)"); +114 put(name_0f, "8f", "jump disp16 bytes away if greater (ZF is unset, SF == OF)"); 115 116 :(scenario jg_rel16_success) 117 % ZF = false; @@ -194,7 +194,7 @@ if ('onhashchange' in window) { 131 case 0x8f: { // jump rel16 if !SF and !ZF 132 int8_t offset = imm16(); 133 if (!ZF && SF == OF) { -134 trace(90, "run") << "jump " << NUM(offset) << end(); +134 trace(90, "run") << "jump " << NUM(offset) << end(); 135 EIP += offset; 136 } 137 break; @@ -217,7 +217,7 @@ if ('onhashchange' in window) { 154 //:: jump if greater or equal 155 156 :(before "End Initialize Op Names(name)") -157 put(name_0f, "8d", "jump disp16 bytes away if greater or equal (SF == OF)"); +157 put(name_0f, "8d", "jump disp16 bytes away if greater or equal (SF == OF)"); 158 159 :(scenario jge_rel16_success) 160 % SF = false; @@ -236,7 +236,7 @@ if ('onhashchange' in window) { 173 case 0x8d: { // jump rel16 if !SF 174 int8_t offset = imm16(); 175 if (SF == OF) { -176 trace(90, "run") << "jump " << NUM(offset) << end(); +176 trace(90, "run") << "jump " << NUM(offset) << end(); 177 EIP += offset; 178 } 179 break; @@ -258,7 +258,7 @@ if ('onhashchange' in window) { 195 //:: jump if lesser 196 197 :(before "End Initialize Op Names(name)") -198 put(name_0f, "8c", "jump disp16 bytes away if lesser (SF != OF)"); +198 put(name_0f, "8c", "jump disp16 bytes away if lesser (SF != OF)"); 199 200 :(scenario jl_rel16_success) 201 % ZF = false; @@ -278,7 +278,7 @@ if ('onhashchange' in window) { 215 case 0x8c: { // jump rel16 if SF and !ZF 216 int8_t offset = imm16(); 217 if (SF != OF) { -218 trace(90, "run") << "jump " << NUM(offset) << end(); +218 trace(90, "run") << "jump " << NUM(offset) << end(); 219 EIP += offset; 220 } 221 break; @@ -301,7 +301,7 @@ if ('onhashchange' in window) { 238 //:: jump if lesser or equal 239 240 :(before "End Initialize Op Names(name)") -241 put(name_0f, "8e", "jump disp16 bytes away if lesser or equal (ZF is set or SF != OF)"); +241 put(name_0f, "8e", "jump disp16 bytes away if lesser or equal (ZF is set or SF != OF)"); 242 243 :(scenario jle_rel16_equal) 244 % ZF = true; @@ -335,7 +335,7 @@ if ('onhashchange' in window) { 272 case 0x8e: { // jump rel16 if SF or ZF 273 int8_t offset = imm16(); 274 if (ZF || SF != OF) { -275 trace(90, "run") << "jump " << NUM(offset) << end(); +275 trace(90, "run") << "jump " << NUM(offset) << end(); 276 EIP += offset; 277 } 278 break; diff --git a/html/subx/019functions.cc.html b/html/subx/019functions.cc.html index c62d79bd..5af61806 100644 --- a/html/subx/019functions.cc.html +++ b/html/subx/019functions.cc.html @@ -64,7 +64,7 @@ if ('onhashchange' in window) { 1 //:: call 2 3 :(before "End Initialize Op Names(name)") - 4 put(name, "e8", "call disp32"); + 4 put(name, "e8", "call disp32"); 5 6 :(scenario call_disp32) 7 % Reg[ESP].u = 0x64; @@ -79,12 +79,12 @@ if ('onhashchange' in window) { 16 17 :(before "End Single-Byte Opcodes") 18 case 0xe8: { // call disp32 relative to next EIP -19 int32_t offset = next32(); -20 trace(90, "run") << "call imm32 0x" << HEXWORD << offset << end(); +19 int32_t offset = next32(); +20 trace(90, "run") << "call imm32 0x" << HEXWORD << offset << end(); 21 //? cerr << "push: EIP: " << EIP << " => " << Reg[ESP].u << '\n'; -22 push(EIP); +22 push(EIP); 23 EIP += offset; -24 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +24 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 25 break; 26 } 27 @@ -105,25 +105,25 @@ if ('onhashchange' in window) { 42 43 :(before "End Op ff Subops") 44 case 2: { // call function pointer at r/m32 -45 trace(90, "run") << "call to r/m32" << end(); +45 trace(90, "run") << "call to r/m32" << end(); 46 int32_t* offset = effective_address(modrm); -47 push(EIP); +47 push(EIP); 48 EIP += *offset; -49 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +49 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 50 break; 51 } 52 53 :(scenario call_mem_at_r32) 54 % Reg[ESP].u = 0x64; -55 % Reg[EBX].u = 0x10; +55 % Reg[EBX].u = 0x2000; 56 == 0x1 # code segment 57 # op ModR/M SIB displacement immediate 58 ff 13 # call function offset at *EBX 59 # next EIP is 3 -60 == 0x10 # data segment +60 == 0x2000 # data segment 61 a0 00 00 00 # 0xa0 62 +run: call to r/m32 -63 +run: effective address is 0x10 (EBX) +63 +run: effective address is 0x2000 (EBX) 64 +run: decrementing ESP to 0x00000060 65 +run: pushing value 0x00000003 66 +run: jumping to 0x000000a3 @@ -131,14 +131,14 @@ if ('onhashchange' in window) { 68 //:: ret 69 70 :(before "End Initialize Op Names(name)") -71 put(name, "c3", "return from most recent unfinished call"); +71 put(name, "c3", "return from most recent unfinished call"); 72 73 :(scenario ret) -74 % Reg[ESP].u = 0x60; +74 % Reg[ESP].u = 0x2000; 75 == 0x1 # code segment 76 # op ModR/M SIB displacement immediate 77 c3 -78 == 0x60 # data segment +78 == 0x2000 # data segment 79 10 00 00 00 # 0x10 80 +run: return 81 +run: popping value 0x00000010 @@ -146,9 +146,9 @@ if ('onhashchange' in window) { 83 84 :(before "End Single-Byte Opcodes") 85 case 0xc3: { // return from a call -86 trace(90, "run") << "return" << end(); -87 EIP = pop(); -88 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +86 trace(90, "run") << "return" << end(); +87 EIP = pop(); +88 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 89 break; 90 } diff --git a/html/subx/020syscalls.cc.html b/html/subx/020syscalls.cc.html index 79ba35b2..33249163 100644 --- a/html/subx/020syscalls.cc.html +++ b/html/subx/020syscalls.cc.html @@ -59,15 +59,15 @@ if ('onhashchange' in window) {
   1 :(before "End Initialize Op Names(name)")
-  2 put(name, "cd", "software interrupt");
+  2 put(name, "cd", "software interrupt");
   3 
   4 :(before "End Single-Byte Opcodes")
   5 case 0xcd: {  // int imm8 (software interrupt)
-  6   trace(90, "run") << "syscall" << end();
-  7   uint8_t code = next();
+  6   trace(90, "run") << "syscall" << end();
+  7   uint8_t code = next();
   8   if (code != 0x80) {
-  9     raise << "Unimplemented interrupt code " << HEXBYTE << code << '\n' << end();
- 10     raise << "  Only `int 80h` supported for now.\n" << end();
+  9     raise << "Unimplemented interrupt code " << HEXBYTE << code << '\n' << end();
+ 10     raise << "  Only `int 80h` supported for now.\n" << end();
  11     break;
  12   }
  13   process_int80();
@@ -81,100 +81,105 @@ if ('onhashchange' in window) {
  21     exit(/*exit code*/Reg[EBX].u);
  22     break;
  23   case 3:
- 24     trace(91, "run") << "read: " << Reg[EBX].u << ' ' << Reg[ECX].u << ' ' << Reg[EDX].u << end();
- 25     trace(91, "run") << Reg[ECX].u << " => " << mem_addr_string(Reg[ECX].u) << end();
- 26     Reg[EAX].i = read(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
- 27     trace(91, "run") << "result: " << Reg[EAX].i << end();
- 28     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 24     trace(91, "run") << "read: " << Reg[EBX].u << ' ' << Reg[ECX].u << ' ' << Reg[EDX].u << end();
+ 25     trace(91, "run") << Reg[ECX].u << " => " << mem_addr_string(Reg[ECX].u) << end();
+ 26     Reg[EAX].i = read(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
+ 27     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 28     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
  29     break;
  30   case 4:
- 31     trace(91, "run") << "write: " << Reg[EBX].u << ' ' << Reg[ECX].u << ' ' << Reg[EDX].u << end();
- 32     trace(91, "run") << Reg[ECX].u << " => " << mem_addr_string(Reg[ECX].u) << end();
- 33     Reg[EAX].i = write(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
- 34     trace(91, "run") << "result: " << Reg[EAX].i << end();
- 35     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 31     trace(91, "run") << "write: " << Reg[EBX].u << ' ' << Reg[ECX].u << ' ' << Reg[EDX].u << end();
+ 32     trace(91, "run") << Reg[ECX].u << " => " << mem_addr_string(Reg[ECX].u) << end();
+ 33     Reg[EAX].i = write(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
+ 34     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 35     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
  36     break;
  37   case 5: {
- 38     check_flags(ECX);
- 39     check_mode(EDX);
- 40     trace(91, "run") << "open: " << Reg[EBX].u << ' ' << Reg[ECX].u << end();
- 41     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
- 42     Reg[EAX].i = open(/*filename*/mem_addr_string(Reg[EBX].u), /*flags*/Reg[ECX].u, /*mode*/0640);
- 43     trace(91, "run") << "result: " << Reg[EAX].i << end();
- 44     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 38     check_flags(ECX);
+ 39     check_mode(EDX);
+ 40     trace(91, "run") << "open: " << Reg[EBX].u << ' ' << Reg[ECX].u << end();
+ 41     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
+ 42     Reg[EAX].i = open(/*filename*/mem_addr_string(Reg[EBX].u), /*flags*/Reg[ECX].u, /*mode*/0640);
+ 43     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 44     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
  45     break;
  46   }
  47   case 6:
- 48     trace(91, "run") << "close: " << Reg[EBX].u << end();
+ 48     trace(91, "run") << "close: " << Reg[EBX].u << end();
  49     Reg[EAX].i = close(/*file descriptor*/Reg[EBX].u);
- 50     trace(91, "run") << "result: " << Reg[EAX].i << end();
- 51     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 50     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 51     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
  52     break;
  53   case 8:
- 54     check_mode(ECX);
- 55     trace(91, "run") << "creat: " << Reg[EBX].u << end();
- 56     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
- 57     Reg[EAX].i = creat(/*filename*/mem_addr_string(Reg[EBX].u), /*mode*/0640);
- 58     trace(91, "run") << "result: " << Reg[EAX].i << end();
- 59     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 54     check_mode(ECX);
+ 55     trace(91, "run") << "creat: " << Reg[EBX].u << end();
+ 56     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
+ 57     Reg[EAX].i = creat(/*filename*/mem_addr_string(Reg[EBX].u), /*mode*/0640);
+ 58     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 59     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
  60     break;
  61   case 10:
- 62     trace(91, "run") << "unlink: " << Reg[EBX].u << end();
- 63     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
- 64     Reg[EAX].i = unlink(/*filename*/mem_addr_string(Reg[EBX].u));
- 65     trace(91, "run") << "result: " << Reg[EAX].i << end();
- 66     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 62     trace(91, "run") << "unlink: " << Reg[EBX].u << end();
+ 63     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
+ 64     Reg[EAX].i = unlink(/*filename*/mem_addr_string(Reg[EBX].u));
+ 65     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 66     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
  67     break;
  68   case 38:
- 69     trace(91, "run") << "rename: " << Reg[EBX].u << " -> " << Reg[ECX].u << end();
- 70     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
- 71     trace(91, "run") << Reg[ECX].u << " => " << mem_addr_string(Reg[ECX].u) << end();
- 72     Reg[EAX].i = rename(/*old filename*/mem_addr_string(Reg[EBX].u), /*new filename*/mem_addr_string(Reg[ECX].u));
- 73     trace(91, "run") << "result: " << Reg[EAX].i << end();
- 74     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 69     trace(91, "run") << "rename: " << Reg[EBX].u << " -> " << Reg[ECX].u << end();
+ 70     trace(91, "run") << Reg[EBX].u << " => " << mem_addr_string(Reg[EBX].u) << end();
+ 71     trace(91, "run") << Reg[ECX].u << " => " << mem_addr_string(Reg[ECX].u) << end();
+ 72     Reg[EAX].i = rename(/*old filename*/mem_addr_string(Reg[EBX].u), /*new filename*/mem_addr_string(Reg[ECX].u));
+ 73     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 74     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
  75     break;
  76   case 45:  // brk: modify size of data segment
- 77     trace(91, "run") << "grow data segment to " << Reg[EBX].u << end();
- 78     resize_mem(/*new end address*/Reg[EBX].u);
+ 77     trace(91, "run") << "grow data segment to " << Reg[EBX].u << end();
+ 78     grow_data_segment(/*new end address*/Reg[EBX].u);
  79     break;
- 80   default:
- 81     raise << HEXWORD << EIP << ": unimplemented syscall " << Reg[EAX].u << '\n' << end();
- 82   }
- 83 }
- 84 
- 85 // SubX is oblivious to file permissions, directories, symbolic links, terminals, and much else besides.
- 86 // Also ignoring any concurrency considerations for now.
- 87 void check_flags(int reg) {
- 88   uint32_t flags = Reg[reg].u;
- 89   if (flags != ((flags & O_RDONLY) | (flags & O_WRONLY))) {
- 90     cerr << HEXWORD << EIP << ": most POSIX flags to the open() syscall are not supported. Just O_RDONLY and O_WRONLY for now. Zero concurrent access support.\n";
- 91     exit(1);
- 92   }
- 93   if ((flags & O_RDONLY) && (flags & O_WRONLY)) {
- 94     cerr << HEXWORD << EIP << ": can't open a file for both reading and writing at once. See http://man7.org/linux/man-pages/man2/open.2.html.\n";
- 95     exit(1);
- 96   }
- 97 }
- 98 
- 99 void check_mode(int reg) {
-100   if (Reg[reg].u != 0600) {
-101     cerr << HEXWORD << EIP << ": SubX is oblivious to file permissions; register " << reg << " must be 0.\n";
+ 80   case 90:  // mmap: allocate memory outside existing segment allocations
+ 81     trace(91, "run") << "mmap: allocate new segment" << end();
+ 82     // Ignore most arguments for now: address hint, protection flags, sharing flags, fd, offset.
+ 83     // We only support anonymous maps.
+ 84     Reg[EAX].u = new_segment(/*length*/read_mem_u32(Reg[EBX].u+0x4));
+ 85     trace(91, "run") << "result: " << Reg[EAX].u << end();
+ 86     break;
+ 87   default:
+ 88     raise << HEXWORD << EIP << ": unimplemented syscall " << Reg[EAX].u << '\n' << end();
+ 89   }
+ 90 }
+ 91 
+ 92 // SubX is oblivious to file permissions, directories, symbolic links, terminals, and much else besides.
+ 93 // Also ignoring any concurrency considerations for now.
+ 94 void check_flags(int reg) {
+ 95   uint32_t flags = Reg[reg].u;
+ 96   if (flags != ((flags & O_RDONLY) | (flags & O_WRONLY))) {
+ 97     cerr << HEXWORD << EIP << ": most POSIX flags to the open() syscall are not supported. Just O_RDONLY and O_WRONLY for now. Zero concurrent access support.\n";
+ 98     exit(1);
+ 99   }
+100   if ((flags & O_RDONLY) && (flags & O_WRONLY)) {
+101     cerr << HEXWORD << EIP << ": can't open a file for both reading and writing at once. See http://man7.org/linux/man-pages/man2/open.2.html.\n";
 102     exit(1);
 103   }
 104 }
 105 
-106 void resize_mem(uint32_t new_end_address) {
-107   if (new_end_address < Mem_offset) {
-108     raise << HEXWORD << EIP << ": can't shrink data segment to before code segment\n";
-109     return;
+106 void check_mode(int reg) {
+107   if (Reg[reg].u != 0600) {
+108     cerr << HEXWORD << EIP << ": SubX is oblivious to file permissions; register " << reg << " must be 0.\n";
+109     exit(1);
 110   }
-111   int32_t new_size = new_end_address - Mem_offset;
-112   if (new_size < SIZE(Mem)) {
-113     raise << HEXWORD << EIP << ": shrinking data segment is not supported.\n" << end();
-114     return;
-115   }
-116   Mem.resize(new_size);  // will throw exception on failure
-117 }
+111 }
+112 
+113 :(before "End Globals")
+114 uint32_t Next_segment = 0xb0000000;  // 0xc0000000 and up is reserved for Linux kernel
+115 const uint32_t SPACE_FOR_SEGMENT = 0x01000000;
+116 :(code)
+117 uint32_t new_segment(uint32_t length) {
+118   uint32_t result = Next_segment;
+119   Mem.push_back(vma(Next_segment, Next_segment+length));
+120   Next_segment -= SPACE_FOR_SEGMENT;
+121   return result;
+122 }
 
diff --git a/html/subx/028translate.cc.html b/html/subx/028translate.cc.html index 593b925a..b83965bf 100644 --- a/html/subx/028translate.cc.html +++ b/html/subx/028translate.cc.html @@ -79,146 +79,176 @@ if ('onhashchange' in window) { 19 :(before "End Main") 20 if (is_equal(argv[1], "translate")) { 21 START_TRACING_UNTIL_END_OF_SCOPE; - 22 assert(argc > 3); - 23 reset(); - 24 program p; - 25 ifstream fin(argv[2]); - 26 if (!fin) { - 27 cerr << "could not open " << argv[2] << '\n'; - 28 return 1; - 29 } - 30 parse(fin, p); - 31 if (trace_contains_errors()) return 1; - 32 transform(p); - 33 if (trace_contains_errors()) return 1; - 34 save_elf(p, argv[3]); - 35 if (trace_contains_errors()) unlink(argv[3]); - 36 return 0; - 37 } - 38 - 39 :(code) - 40 // write out a program to a bare-bones ELF file - 41 void save_elf(const program& p, const char* filename) { - 42 ofstream out(filename, ios::binary); - 43 write_elf_header(out, p); - 44 for (size_t i = 0; i < p.segments.size(); ++i) - 45 write_segment(p.segments.at(i), out); - 46 out.close(); - 47 } - 48 - 49 void write_elf_header(ostream& out, const program& p) { - 50 char c = '\0'; - 51 #define O(X) c = (X); out.write(&c, sizeof(c)) - 52 // host is required to be little-endian - 53 #define emit(X) out.write(reinterpret_cast<const char*>(&X), sizeof(X)) - 54 //// ehdr - 55 // e_ident - 56 O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46); - 57 O(0x1); // 32-bit format - 58 O(0x1); // little-endian - 59 O(0x1); O(0x0); - 60 for (size_t i = 0; i < 8; ++i) { O(0x0); } - 61 // e_type - 62 O(0x02); O(0x00); - 63 // e_machine - 64 O(0x03); O(0x00); - 65 // e_version - 66 O(0x01); O(0x00); O(0x00); O(0x00); - 67 // e_entry - 68 int e_entry = p.segments.at(0).start; // convention - 69 emit(e_entry); - 70 // e_phoff -- immediately after ELF header - 71 int e_phoff = 0x34; - 72 emit(e_phoff); - 73 // e_shoff; unused - 74 int dummy32 = 0; - 75 emit(dummy32); - 76 // e_flags; unused - 77 emit(dummy32); - 78 // e_ehsize - 79 uint16_t e_ehsize = 0x34; - 80 emit(e_ehsize); - 81 // e_phentsize - 82 uint16_t e_phentsize = 0x20; - 83 emit(e_phentsize); - 84 // e_phnum - 85 uint16_t e_phnum = SIZE(p.segments); - 86 emit(e_phnum); - 87 // e_shentsize - 88 uint16_t dummy16 = 0x0; - 89 emit(dummy16); - 90 // e_shnum - 91 emit(dummy16); - 92 // e_shstrndx - 93 emit(dummy16); - 94 - 95 uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/; - 96 for (int i = 0; i < SIZE(p.segments); ++i) { - 97 //// phdr - 98 // p_type - 99 uint32_t p_type = 0x1; -100 emit(p_type); -101 // p_offset -102 emit(p_offset); -103 // p_vaddr -104 uint32_t p_start = p.segments.at(i).start; -105 emit(p_start); -106 // p_paddr -107 emit(p_start); -108 // p_filesz -109 uint32_t size = num_words(p.segments.at(i)); -110 assert(p_offset + size < SEGMENT_SIZE); -111 emit(size); -112 // p_memsz -113 emit(size); -114 // p_flags -115 uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6; // convention: only first segment is code -116 emit(p_flags); -117 -118 // p_align -119 // "As the system creates or augments a process image, it logically copies -120 // a file's segment to a virtual memory segment. When—and if— the system -121 // physically reads the file depends on the program's execution behavior, -122 // system load, and so on. A process does not require a physical page -123 // unless it references the logical page during execution, and processes -124 // commonly leave many pages unreferenced. Therefore delaying physical -125 // reads frequently obviates them, improving system performance. To obtain -126 // this efficiency in practice, executable and shared object files must -127 // have segment images whose file offsets and virtual addresses are -128 // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95) -129 uint32_t p_align = 0x1000; // default page size on linux -130 emit(p_align); -131 if (p_offset % p_align != p_start % p_align) { -132 raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end(); -133 return; -134 } -135 -136 // prepare for next segment -137 p_offset += size; -138 } -139 #undef O -140 #undef emit -141 } -142 -143 void write_segment(const segment& s, ostream& out) { -144 for (int i = 0; i < SIZE(s.lines); ++i) { -145 const vector<word>& w = s.lines.at(i).words; -146 for (int j = 0; j < SIZE(w); ++j) { -147 uint8_t x = hex_byte(w.at(j).data); // we're done with metadata by this point -148 out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1); -149 } -150 } -151 } -152 -153 uint32_t num_words(const segment& s) { -154 uint32_t sum = 0; -155 for (int i = 0; i < SIZE(s.lines); ++i) -156 sum += SIZE(s.lines.at(i).words); -157 return sum; -158 } -159 -160 :(before "End Includes") -161 using std::ios; + 22 reset(); + 23 program p; + 24 string output_filename; + 25 for (int i = /*skip 'subx translate'*/2; i < argc; ++i) { + 26 if (is_equal(argv[i], "-o")) { + 27 ++i; + 28 if (i >= argc) { + 29 print_translate_usage(); + 30 cerr << "'-o' must be followed by a filename to write results to\n"; + 31 exit(1); + 32 } + 33 output_filename = argv[i]; + 34 } + 35 else { + 36 ifstream fin(argv[i]); + 37 if (!fin) { + 38 cerr << "could not open " << argv[i] << '\n'; + 39 return 1; + 40 } + 41 parse(fin, p); + 42 if (trace_contains_errors()) return 1; + 43 } + 44 } + 45 if (p.segments.empty()) { + 46 print_translate_usage(); + 47 cerr << "nothing to do; must provide at least one file to read\n"; + 48 exit(1); + 49 } + 50 if (output_filename.empty()) { + 51 print_translate_usage(); + 52 cerr << "must provide a filename to write to using '-o'\n"; + 53 exit(1); + 54 } + 55 transform(p); + 56 if (trace_contains_errors()) return 1; + 57 save_elf(p, output_filename); + 58 if (trace_contains_errors()) { + 59 unlink(output_filename.c_str()); + 60 return 1; + 61 } + 62 return 0; + 63 } + 64 + 65 :(code) + 66 void print_translate_usage() { + 67 cerr << "Usage: subx translate file1 file2 ... -o output\n"; + 68 } + 69 + 70 // write out a program to a bare-bones ELF file + 71 void save_elf(const program& p, const string& filename) { + 72 ofstream out(filename.c_str(), ios::binary); + 73 write_elf_header(out, p); + 74 for (size_t i = 0; i < p.segments.size(); ++i) + 75 write_segment(p.segments.at(i), out); + 76 out.close(); + 77 } + 78 + 79 void write_elf_header(ostream& out, const program& p) { + 80 char c = '\0'; + 81 #define O(X) c = (X); out.write(&c, sizeof(c)) + 82 // host is required to be little-endian + 83 #define emit(X) out.write(reinterpret_cast<const char*>(&X), sizeof(X)) + 84 //// ehdr + 85 // e_ident + 86 O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46); + 87 O(0x1); // 32-bit format + 88 O(0x1); // little-endian + 89 O(0x1); O(0x0); + 90 for (size_t i = 0; i < 8; ++i) { O(0x0); } + 91 // e_type + 92 O(0x02); O(0x00); + 93 // e_machine + 94 O(0x03); O(0x00); + 95 // e_version + 96 O(0x01); O(0x00); O(0x00); O(0x00); + 97 // e_entry + 98 int e_entry = p.segments.at(0).start; // convention + 99 emit(e_entry); +100 // e_phoff -- immediately after ELF header +101 int e_phoff = 0x34; +102 emit(e_phoff); +103 // e_shoff; unused +104 int dummy32 = 0; +105 emit(dummy32); +106 // e_flags; unused +107 emit(dummy32); +108 // e_ehsize +109 uint16_t e_ehsize = 0x34; +110 emit(e_ehsize); +111 // e_phentsize +112 uint16_t e_phentsize = 0x20; +113 emit(e_phentsize); +114 // e_phnum +115 uint16_t e_phnum = SIZE(p.segments); +116 emit(e_phnum); +117 // e_shentsize +118 uint16_t dummy16 = 0x0; +119 emit(dummy16); +120 // e_shnum +121 emit(dummy16); +122 // e_shstrndx +123 emit(dummy16); +124 +125 uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/; +126 for (int i = 0; i < SIZE(p.segments); ++i) { +127 //// phdr +128 // p_type +129 uint32_t p_type = 0x1; +130 emit(p_type); +131 // p_offset +132 emit(p_offset); +133 // p_vaddr +134 uint32_t p_start = p.segments.at(i).start; +135 emit(p_start); +136 // p_paddr +137 emit(p_start); +138 // p_filesz +139 uint32_t size = num_words(p.segments.at(i)); +140 assert(p_offset + size < SEGMENT_SIZE); +141 emit(size); +142 // p_memsz +143 emit(size); +144 // p_flags +145 uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6; // convention: only first segment is code +146 emit(p_flags); +147 +148 // p_align +149 // "As the system creates or augments a process image, it logically copies +150 // a file's segment to a virtual memory segment. When—and if— the system +151 // physically reads the file depends on the program's execution behavior, +152 // system load, and so on. A process does not require a physical page +153 // unless it references the logical page during execution, and processes +154 // commonly leave many pages unreferenced. Therefore delaying physical +155 // reads frequently obviates them, improving system performance. To obtain +156 // this efficiency in practice, executable and shared object files must +157 // have segment images whose file offsets and virtual addresses are +158 // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95) +159 uint32_t p_align = 0x1000; // default page size on linux +160 emit(p_align); +161 if (p_offset % p_align != p_start % p_align) { +162 raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end(); +163 return; +164 } +165 +166 // prepare for next segment +167 p_offset += size; +168 } +169 #undef O +170 #undef emit +171 } +172 +173 void write_segment(const segment& s, ostream& out) { +174 for (int i = 0; i < SIZE(s.lines); ++i) { +175 const vector<word>& w = s.lines.at(i).words; +176 for (int j = 0; j < SIZE(w); ++j) { +177 uint8_t x = hex_byte(w.at(j).data); // we're done with metadata by this point +178 out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1); +179 } +180 } +181 } +182 +183 uint32_t num_words(const segment& s) { +184 uint32_t sum = 0; +185 for (int i = 0; i < SIZE(s.lines); ++i) +186 sum += SIZE(s.lines.at(i).words); +187 return sum; +188 } +189 +190 :(before "End Includes") +191 using std::ios; diff --git a/html/subx/030---operands.cc.html b/html/subx/030---operands.cc.html index 0129da0a..af7cf865 100644 --- a/html/subx/030---operands.cc.html +++ b/html/subx/030---operands.cc.html @@ -20,6 +20,7 @@ a:hover { text-decoration: underline; } .LineNr { color: #444444; } .Constant { color: #00a0a0; } .Delimiter { color: #800080; } +.Special { color: #c00000; } .SalientComment { color: #00ffff; } .Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } @@ -198,7 +199,7 @@ if ('onhashchange' in window) { 137 138 :(after "Begin Transforms") 139 // Begin Level-2 Transforms -140 Transform.push_back(pack_operands); +140 Transform.push_back(pack_operands); 141 // End Level-2 Transforms 142 143 :(code) @@ -206,13 +207,13 @@ if ('onhashchange' in window) { 145 if (p.segments.empty()) return; 146 segment& code = p.segments.at(0); 147 // Pack Operands(segment code) -148 trace(99, "transform") << "-- pack operands" << end(); +148 trace(99, "transform") << "-- pack operands" << end(); 149 for (int i = 0; i < SIZE(code.lines); ++i) { 150 line& inst = code.lines.at(i); -151 if (all_hex_bytes(inst)) continue; -152 trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end(); +151 if (all_hex_bytes(inst)) continue; +152 trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end(); 153 pack_operands(inst); -154 trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end(); +154 trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end(); 155 } 156 } 157 @@ -228,11 +229,11 @@ if ('onhashchange' in window) { 167 168 void add_opcodes(const line& in, line& out) { 169 out.words.push_back(in.words.at(0)); -170 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3") +170 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3") 171 out.words.push_back(in.words.at(1)); -172 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f") +172 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f") 173 out.words.push_back(in.words.at(2)); -174 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f") +174 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f") 175 out.words.push_back(in.words.at(2)); 176 } 177 @@ -242,24 +243,24 @@ if ('onhashchange' in window) { 181 for (int i = 0; i < SIZE(in.words); ++i) { 182 const word& curr = in.words.at(i); 183 if (has_operand_metadata(curr, "mod")) { -184 mod = hex_byte(curr.data); +184 mod = hex_byte(curr.data); 185 emit = true; 186 } 187 else if (has_operand_metadata(curr, "rm32")) { -188 rm32 = hex_byte(curr.data); +188 rm32 = hex_byte(curr.data); 189 emit = true; 190 } 191 else if (has_operand_metadata(curr, "r32")) { -192 reg_subop = hex_byte(curr.data); +192 reg_subop = hex_byte(curr.data); 193 emit = true; 194 } 195 else if (has_operand_metadata(curr, "subop")) { -196 reg_subop = hex_byte(curr.data); +196 reg_subop = hex_byte(curr.data); 197 emit = true; 198 } 199 } 200 if (emit) -201 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32)); +201 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32)); 202 } 203 204 void add_sib_byte(const line& in, line& out) { @@ -268,20 +269,20 @@ if ('onhashchange' in window) { 207 for (int i = 0; i < SIZE(in.words); ++i) { 208 const word& curr = in.words.at(i); 209 if (has_operand_metadata(curr, "scale")) { -210 scale = hex_byte(curr.data); +210 scale = hex_byte(curr.data); 211 emit = true; 212 } 213 else if (has_operand_metadata(curr, "index")) { -214 index = hex_byte(curr.data); +214 index = hex_byte(curr.data); 215 emit = true; 216 } 217 else if (has_operand_metadata(curr, "base")) { -218 base = hex_byte(curr.data); +218 base = hex_byte(curr.data); 219 emit = true; 220 } 221 } 222 if (emit) -223 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base)); +223 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base)); 224 } 225 226 void add_disp_bytes(const line& in, line& out) { @@ -308,200 +309,210 @@ if ('onhashchange' in window) { 247 248 void emit_hex_bytes(line& out, const word& w, int num) { 249 assert(num <= 4); -250 if (num == 1 || !is_hex_int(w.data)) { -251 out.words.push_back(w); -252 if (is_hex_int(w.data)) -253 out.words.back().data = hex_byte_to_string(parse_int(w.data)); -254 return; -255 } -256 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); -257 } -258 -259 void emit_hex_bytes(line& out, uint32_t val, int num) { -260 assert(num <= 4); -261 for (int i = 0; i < num; ++i) { -262 out.words.push_back(hex_byte_text(val & 0xff)); -263 val = val >> 8; -264 } -265 } -266 -267 word hex_byte_text(uint8_t val) { -268 word result; -269 result.data = hex_byte_to_string(val); -270 result.original = result.data+"/auto"; -271 return result; -272 } -273 -274 string hex_byte_to_string(uint8_t val) { -275 ostringstream out; -276 // uint8_t prints without padding, but int8_t will expand to 32 bits again -277 out << HEXBYTE << NUM(val); -278 return out.str(); -279 } -280 -281 string to_string(const vector<word>& in) { -282 ostringstream out; -283 for (int i = 0; i < SIZE(in); ++i) { -284 if (i > 0) out << ' '; -285 out << in.at(i).data; -286 } -287 return out.str(); -288 } -289 -290 :(before "End Unit Tests") -291 void test_preserve_metadata_when_emitting_single_byte() { -292 word in; -293 in.data = "f0"; -294 in.original = "f0/foo"; -295 line out; -296 emit_hex_bytes(out, in, 1); -297 CHECK_EQ(out.words.at(0).data, "f0"); -298 CHECK_EQ(out.words.at(0).original, "f0/foo"); -299 } -300 -301 :(scenario pack_disp8) -302 == 0x1 -303 74 2/disp8 # jump 2 bytes away if ZF is set -304 +transform: packing instruction '74 2/disp8' -305 +transform: instruction after packing: '74 02' -306 -307 :(scenarios transform) -308 :(scenario pack_disp8_negative) -309 == 0x1 -310 # running this will cause an infinite loop -311 74 -1/disp8 # jump 1 byte before if ZF is set -312 +transform: packing instruction '74 -1/disp8' -313 +transform: instruction after packing: '74 ff' -314 :(scenarios run) -315 -316 //: helper for scenario -317 :(code) -318 void transform(const string& text_bytes) { -319 program p; -320 istringstream in(text_bytes); -321 parse(in, p); -322 if (trace_contains_errors()) return; -323 transform(p); -324 } -325 -326 :(scenario pack_modrm_imm32) -327 == 0x1 -328 # instruction effective address operand displacement immediate -329 # op subop mod rm32 base index scale r32 -330 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -331 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 # add 1 to EBX -332 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32' -333 +transform: instruction after packing: '81 c3 01 00 00 00' -334 -335 :(scenario pack_imm32_large) -336 == 0x1 -337 b9 0x080490a7/imm32 # copy to ECX -338 +transform: packing instruction 'b9 0x080490a7/imm32' -339 +transform: instruction after packing: 'b9 a7 90 04 08' -340 -341 :(scenario pack_immediate_constants_hex) -342 == 0x1 -343 # instruction effective address operand displacement immediate -344 # op subop mod rm32 base index scale r32 -345 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -346 bb 0x2a/imm32 # copy 42 to EBX -347 +transform: packing instruction 'bb 0x2a/imm32' -348 +transform: instruction after packing: 'bb 2a 00 00 00' -349 +run: copy imm32 0x0000002a to EBX -350 -351 :(scenarios transform) -352 :(scenario pack_silently_ignores_non_hex) -353 == 0x1 -354 # instruction effective address operand displacement immediate -355 # op subop mod rm32 base index scale r32 -356 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -357 bb foo/imm32 # copy foo to EBX -358 +transform: packing instruction 'bb foo/imm32' -359 # no change (we're just not printing metadata to the trace) -360 +transform: instruction after packing: 'bb foo' -361 $error: 0 -362 :(scenarios run) -363 -364 //:: helpers -365 -366 :(code) -367 bool all_hex_bytes(const line& inst) { -368 for (int i = 0; i < SIZE(inst.words); ++i) -369 if (!is_hex_byte(inst.words.at(i))) -370 return false; -371 return true; -372 } +250 bool is_number = looks_like_hex_int(w.data); +251 if (num == 1 || !is_number) { +252 out.words.push_back(w); // preserve existing metadata +253 if (is_number) +254 out.words.back().data = hex_byte_to_string(parse_int(w.data)); +255 return; +256 } +257 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); +258 } +259 +260 void emit_hex_bytes(line& out, uint32_t val, int num) { +261 assert(num <= 4); +262 for (int i = 0; i < num; ++i) { +263 out.words.push_back(hex_byte_text(val & 0xff)); +264 val = val >> 8; +265 } +266 } +267 +268 word hex_byte_text(uint8_t val) { +269 word result; +270 result.data = hex_byte_to_string(val); +271 result.original = result.data+"/auto"; +272 return result; +273 } +274 +275 string hex_byte_to_string(uint8_t val) { +276 ostringstream out; +277 // uint8_t prints without padding, but int8_t will expand to 32 bits again +278 out << HEXBYTE << NUM(val); +279 return out.str(); +280 } +281 +282 string to_string(const vector<word>& in) { +283 ostringstream out; +284 for (int i = 0; i < SIZE(in); ++i) { +285 if (i > 0) out << ' '; +286 out << in.at(i).data; +287 } +288 return out.str(); +289 } +290 +291 :(before "End Unit Tests") +292 void test_preserve_metadata_when_emitting_single_byte() { +293 word in; +294 in.data = "f0"; +295 in.original = "f0/foo"; +296 line out; +297 emit_hex_bytes(out, in, 1); +298 CHECK_EQ(out.words.at(0).data, "f0"); +299 CHECK_EQ(out.words.at(0).original, "f0/foo"); +300 } +301 +302 :(scenario pack_disp8) +303 == 0x1 +304 74 2/disp8 # jump 2 bytes away if ZF is set +305 +transform: packing instruction '74 2/disp8' +306 +transform: instruction after packing: '74 02' +307 +308 :(scenarios transform) +309 :(scenario pack_disp8_negative) +310 == 0x1 +311 # running this will cause an infinite loop +312 74 -1/disp8 # jump 1 byte before if ZF is set +313 +transform: packing instruction '74 -1/disp8' +314 +transform: instruction after packing: '74 ff' +315 :(scenarios run) +316 +317 //: helper for scenario +318 :(code) +319 void transform(const string& text_bytes) { +320 program p; +321 istringstream in(text_bytes); +322 parse(in, p); +323 if (trace_contains_errors()) return; +324 transform(p); +325 } +326 +327 :(scenario pack_modrm_imm32) +328 == 0x1 +329 # instruction effective address operand displacement immediate +330 # op subop mod rm32 base index scale r32 +331 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +332 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 # add 1 to EBX +333 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32' +334 +transform: instruction after packing: '81 c3 01 00 00 00' +335 +336 :(scenario pack_imm32_large) +337 == 0x1 +338 b9 0x080490a7/imm32 # copy to ECX +339 +transform: packing instruction 'b9 0x080490a7/imm32' +340 +transform: instruction after packing: 'b9 a7 90 04 08' +341 +342 :(scenario pack_immediate_constants_hex) +343 == 0x1 +344 # instruction effective address operand displacement immediate +345 # op subop mod rm32 base index scale r32 +346 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +347 bb 0x2a/imm32 # copy 42 to EBX +348 +transform: packing instruction 'bb 0x2a/imm32' +349 +transform: instruction after packing: 'bb 2a 00 00 00' +350 +run: copy imm32 0x0000002a to EBX +351 +352 :(scenarios transform) +353 :(scenario pack_silently_ignores_non_hex) +354 % Hide_errors = true; +355 == 0x1 +356 # instruction effective address operand displacement immediate +357 # op subop mod rm32 base index scale r32 +358 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +359 bb foo/imm32 # copy to EBX +360 +transform: packing instruction 'bb foo/imm32' +361 # no change (we're just not printing metadata to the trace) +362 +transform: instruction after packing: 'bb foo' +363 :(scenarios run) +364 +365 :(scenario pack_flags_bad_hex) +366 % Hide_errors = true; +367 == 0x1 +368 # instruction effective address operand displacement immediate +369 # op subop mod rm32 base index scale r32 +370 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +371 bb 0xfoo/imm32 # copy to EBX +372 +error: not a number: 0xfoo 373 -374 bool is_hex_byte(const word& curr) { -375 if (contains_any_operand_metadata(curr)) -376 return false; -377 if (SIZE(curr.data) != 2) -378 return false; -379 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos) -380 return false; +374 //:: helpers +375 +376 :(code) +377 bool all_hex_bytes(const line& inst) { +378 for (int i = 0; i < SIZE(inst.words); ++i) +379 if (!is_hex_byte(inst.words.at(i))) +380 return false; 381 return true; 382 } 383 -384 bool contains_any_operand_metadata(const word& word) { -385 for (int i = 0; i < SIZE(word.metadata); ++i) -386 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end()) -387 return true; -388 return false; -389 } -390 -391 bool has_operand_metadata(const line& inst, const string& m) { -392 bool result = false; -393 for (int i = 0; i < SIZE(inst.words); ++i) { -394 if (!has_operand_metadata(inst.words.at(i), m)) continue; -395 if (result) { -396 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); -397 return false; -398 } -399 result = true; -400 } -401 return result; -402 } -403 -404 bool has_operand_metadata(const word& w, const string& m) { -405 bool result = false; -406 bool metadata_found = false; -407 for (int i = 0; i < SIZE(w.metadata); ++i) { -408 const string& curr = w.metadata.at(i); -409 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata -410 if (metadata_found) { -411 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); -412 return false; -413 } -414 metadata_found = true; -415 result = (curr == m); -416 } -417 return result; -418 } -419 -420 word metadata(const line& inst, const string& m) { -421 for (int i = 0; i < SIZE(inst.words); ++i) -422 if (has_operand_metadata(inst.words.at(i), m)) -423 return inst.words.at(i); -424 assert(false); -425 } -426 -427 bool is_hex_int(const string& s) { -428 if (s.empty()) return false; -429 size_t pos = 0; -430 if (s.at(0) == '-' || s.at(0) == '+') pos++; -431 if (s.substr(pos, pos+2) == "0x") pos += 2; -432 return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos; -433 } -434 -435 :(code) -436 string to_string(const line& inst) { -437 ostringstream out; -438 for (int i = 0; i < SIZE(inst.words); ++i) { -439 if (i > 0) out << ' '; -440 out << inst.words.at(i).original; -441 } -442 return out.str(); +384 bool is_hex_byte(const word& curr) { +385 if (contains_any_operand_metadata(curr)) +386 return false; +387 if (SIZE(curr.data) != 2) +388 return false; +389 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos) +390 return false; +391 return true; +392 } +393 +394 bool contains_any_operand_metadata(const word& word) { +395 for (int i = 0; i < SIZE(word.metadata); ++i) +396 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end()) +397 return true; +398 return false; +399 } +400 +401 bool has_operand_metadata(const line& inst, const string& m) { +402 bool result = false; +403 for (int i = 0; i < SIZE(inst.words); ++i) { +404 if (!has_operand_metadata(inst.words.at(i), m)) continue; +405 if (result) { +406 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); +407 return false; +408 } +409 result = true; +410 } +411 return result; +412 } +413 +414 bool has_operand_metadata(const word& w, const string& m) { +415 bool result = false; +416 bool metadata_found = false; +417 for (int i = 0; i < SIZE(w.metadata); ++i) { +418 const string& curr = w.metadata.at(i); +419 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata +420 if (metadata_found) { +421 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); +422 return false; +423 } +424 metadata_found = true; +425 result = (curr == m); +426 } +427 return result; +428 } +429 +430 word metadata(const line& inst, const string& m) { +431 for (int i = 0; i < SIZE(inst.words); ++i) +432 if (has_operand_metadata(inst.words.at(i), m)) +433 return inst.words.at(i); +434 assert(false); +435 } +436 +437 bool looks_like_hex_int(const string& s) { +438 if (s.empty()) return false; +439 if (s.at(0) == '-' || s.at(0) == '+') return true; +440 if (isdigit(s.at(0))) return true; // includes '0x' prefix +441 // End looks_like_hex_int(s) Detectors +442 return false; 443 } +444 +445 :(code) +446 string to_string(const line& inst) { +447 ostringstream out; +448 for (int i = 0; i < SIZE(inst.words); ++i) { +449 if (i > 0) out << ' '; +450 out << inst.words.at(i).original; +451 } +452 return out.str(); +453 } diff --git a/html/subx/031check_operands.cc.html b/html/subx/031check_operands.cc.html index d53cc447..34ce47b6 100644 --- a/html/subx/031check_operands.cc.html +++ b/html/subx/031check_operands.cc.html @@ -80,7 +80,7 @@ if ('onhashchange' in window) { 16 17 :(code) 18 void check_operands(const segment& code) { - 19 trace(99, "transform") << "-- check operands" << end(); + 19 trace(99, "transform") << "-- check operands" << end(); 20 for (int i = 0; i < SIZE(code.lines); ++i) { 21 check_operands(code.lines.at(i)); 22 if (trace_contains_errors()) return; // stop at the first mal-formed instruction @@ -89,11 +89,11 @@ if ('onhashchange' in window) { 25 26 void check_operands(const line& inst) { 27 word op = preprocess_op(inst.words.at(0)); - 28 if (op.data == "0f") { + 28 if (op.data == "0f") { 29 check_operands_0f(inst); 30 return; 31 } - 32 if (op.data == "f3") { + 32 if (op.data == "f3") { 33 check_operands_f3(inst); 34 return; 35 } @@ -101,19 +101,19 @@ if ('onhashchange' in window) { 37 } 38 39 word preprocess_op(word/*copy*/ op) { - 40 op.data = tolower(op.data.c_str()); + 40 op.data = tolower(op.data.c_str()); 41 // opcodes can't be negative - 42 if (starts_with(op.data, "0x")) - 43 op.data = op.data.substr(2); - 44 if (SIZE(op.data) == 1) - 45 op.data = string("0")+op.data; + 42 if (starts_with(op.data, "0x")) + 43 op.data = op.data.substr(2); + 44 if (SIZE(op.data) == 1) + 45 op.data = string("0")+op.data; 46 return op; 47 } 48 49 void test_preprocess_op() { - 50 word w1; w1.data = "0xf"; - 51 word w2; w2.data = "0f"; - 52 CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data); + 50 word w1; w1.data = "0xf"; + 51 word w2; w2.data = "0f"; + 52 CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data); 53 } 54 55 //: To check the operands for an opcode, we'll track the permitted operands @@ -309,7 +309,7 @@ if ('onhashchange' in window) { 245 246 void check_operands(const line& inst, const word& op) { 247 if (!is_hex_byte(op)) return; -248 uint8_t expected_bitvector = get(Permitted_operands, op.data); +248 uint8_t expected_bitvector = get(Permitted_operands, op.data); 249 if (HAS(expected_bitvector, MODRM)) { 250 check_operands_modrm(inst, op); 251 compare_bitvector_modrm(inst, expected_bitvector, op); @@ -322,7 +322,7 @@ if ('onhashchange' in window) { 258 //: Many instructions can be checked just by comparing bitvectors. 259 260 void compare_bitvector(const line& inst, uint8_t expected, const word& op) { -261 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +261 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 262 uint8_t bitvector = compute_operand_bitvector(inst); 263 if (trace_contains_errors()) return; // duplicate operand type 264 if (bitvector == expected) return; // all good with this instruction @@ -331,9 +331,9 @@ if ('onhashchange' in window) { 267 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand 268 const string& optype = Operand_type_name.at(i); 269 if ((bitvector & 0x1) > (expected & 0x1)) -270 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); +270 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); 271 else -272 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); +272 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); 273 // continue giving all errors for a single instruction 274 } 275 // ignore settings in any unused bits @@ -341,8 +341,8 @@ if ('onhashchange' in window) { 277 278 string maybe_name(const word& op) { 279 if (!is_hex_byte(op)) return ""; -280 if (!contains_key(name, op.data)) return ""; -281 return " ("+get(name, op.data)+')'; +280 if (!contains_key(name, op.data)) return ""; +281 return " ("+get(name, op.data)+')'; 282 } 283 284 uint32_t compute_operand_bitvector(const line& inst) { @@ -359,9 +359,9 @@ if ('onhashchange' in window) { 295 } 296 297 int first_operand(const line& inst) { -298 if (inst.words.at(0).data == "0f") return 2; -299 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") { -300 if (inst.words.at(1).data == "0f") +298 if (inst.words.at(0).data == "0f") return 2; +299 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") { +300 if (inst.words.at(1).data == "0f") 301 return 3; 302 else 303 return 2; @@ -378,7 +378,7 @@ if ('onhashchange' in window) { 314 const string& curr = w.metadata.at(i); 315 if (!contains_key(Operand_type, curr)) continue; // ignore unrecognized metadata 316 if (found) { -317 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); +317 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); 318 return INVALID_OPERANDS; 319 } 320 bv = (1 << get(Operand_type, curr)); @@ -404,18 +404,18 @@ if ('onhashchange' in window) { 340 341 :(code) 342 void check_operands_modrm(const line& inst, const word& op) { -343 if (all_hex_bytes(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +343 if (all_hex_bytes(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 344 check_operand_metadata_present(inst, "mod", op); 345 check_operand_metadata_present(inst, "rm32", op); 346 // no check for r32; some instructions don't use it; just assume it's 0 if missing -347 if (op.data == "81" || op.data == "8f" || op.data == "ff") { // keep sync'd with 'help subop' +347 if (op.data == "81" || op.data == "8f" || op.data == "ff") { // keep sync'd with 'help subop' 348 check_operand_metadata_present(inst, "subop", op); 349 check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop"); 350 } 351 if (trace_contains_errors()) return; -352 if (metadata(inst, "rm32").data != "4") return; +352 if (metadata(inst, "rm32").data != "4") return; 353 // SIB byte checks -354 uint8_t mod = hex_byte(metadata(inst, "mod").data); +354 uint8_t mod = hex_byte(metadata(inst, "mod").data); 355 if (mod != /*direct*/3) { 356 check_operand_metadata_present(inst, "base", op); 357 check_operand_metadata_present(inst, "index", op); // otherwise why go to SIB? @@ -431,7 +431,7 @@ if ('onhashchange' in window) { 367 // exception 1: ignore modrm bit since we already checked it above 368 // exception 2: modrm instructions can use a displacement on occasion 369 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) { -370 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +370 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 371 uint8_t bitvector = compute_operand_bitvector(inst); 372 if (trace_contains_errors()) return; // duplicate operand type 373 expected = CLEAR(expected, MODRM); // exception 1 @@ -442,9 +442,9 @@ if ('onhashchange' in window) { 378 if (i == DISP8 || i == DISP32) continue; // exception 2 379 const string& optype = Operand_type_name.at(i); 380 if ((bitvector & 0x1) > (expected & 0x1)) -381 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); +381 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); 382 else -383 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); +383 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); 384 // continue giving all errors for a single instruction 385 } 386 // ignore settings in any unused bits @@ -452,12 +452,12 @@ if ('onhashchange' in window) { 388 389 void check_operand_metadata_present(const line& inst, const string& type, const word& op) { 390 if (!has_operand_metadata(inst, type)) -391 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end(); +391 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end(); 392 } 393 394 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) { 395 if (has_operand_metadata(inst, type)) -396 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end(); +396 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end(); 397 } 398 399 :(scenarios transform) @@ -520,21 +520,21 @@ if ('onhashchange' in window) { 456 457 :(code) 458 void check_operands_0f(const line& inst) { -459 assert(inst.words.at(0).data == "0f"); +459 assert(inst.words.at(0).data == "0f"); 460 if (SIZE(inst.words) == 1) { -461 raise << "opcode '0f' requires a second opcode\n" << end(); +461 raise << "opcode '0f' requires a second opcode\n" << end(); 462 return; 463 } 464 word op = preprocess_op(inst.words.at(1)); -465 if (!contains_key(name_0f, op.data)) { -466 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end(); +465 if (!contains_key(name_0f, op.data)) { +466 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end(); 467 return; 468 } 469 check_operands_0f(inst, op); 470 } 471 472 void check_operands_f3(const line& /*unused*/) { -473 raise << "no supported opcodes starting with f3\n" << end(); +473 raise << "no supported opcodes starting with f3\n" << end(); 474 } 475 476 :(scenario check_missing_disp16_operand) @@ -566,14 +566,14 @@ if ('onhashchange' in window) { 502 503 :(code) 504 void check_operands_0f(const line& inst, const word& op) { -505 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data); +505 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data); 506 if (HAS(expected_bitvector, MODRM)) 507 check_operands_modrm(inst, op); 508 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op); 509 } 510 511 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) { -512 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +512 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 513 uint8_t bitvector = compute_operand_bitvector(inst); 514 if (trace_contains_errors()) return; // duplicate operand type 515 if (bitvector == expected) return; // all good with this instruction @@ -582,9 +582,9 @@ if ('onhashchange' in window) { 518 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand 519 const string& optype = Operand_type_name.at(i); 520 if ((bitvector & 0x1) > (expected & 0x1)) -521 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end(); +521 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end(); 522 else -523 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end(); +523 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end(); 524 // continue giving all errors for a single instruction 525 } 526 // ignore settings in any unused bits diff --git a/html/subx/032check_operand_bounds.cc.html b/html/subx/032check_operand_bounds.cc.html index df91508e..757446f3 100644 --- a/html/subx/032check_operand_bounds.cc.html +++ b/html/subx/032check_operand_bounds.cc.html @@ -90,7 +90,7 @@ if ('onhashchange' in window) { 27 if (trace_contains_errors()) return; 28 :(code) 29 void check_operand_bounds(const segment& code) { -30 trace(99, "transform") << "-- check operand bounds" << end(); +30 trace(99, "transform") << "-- check operand bounds" << end(); 31 for (int i = 0; i < SIZE(code.lines); ++i) { 32 const line& inst = code.lines.at(i); 33 for (int j = first_operand(inst); j < SIZE(inst.words); ++j) @@ -100,18 +100,18 @@ if ('onhashchange' in window) { 37 } 38 39 void check_operand_bounds(const word& w) { -40 for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) { +40 for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) { 41 if (!has_operand_metadata(w, p->first)) continue; -42 if (!is_hex_int(w.data)) continue; // later transforms are on their own to do their own bounds checking -43 int32_t x = parse_int(w.data); +42 if (!looks_like_hex_int(w.data)) continue; // later transforms are on their own to do their own bounds checking +43 int32_t x = parse_int(w.data); 44 if (x >= 0) { 45 if (static_cast<uint32_t>(x) >= p->second) -46 raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); +46 raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); 47 } 48 else { 49 // hacky? assuming bound is a power of 2 50 if (x < -1*static_cast<int32_t>(p->second/2)) -51 raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); +51 raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); 52 } 53 } 54 } diff --git a/html/subx/034compute_segment_address.cc.html b/html/subx/034compute_segment_address.cc.html index d5111bef..e8820050 100644 --- a/html/subx/034compute_segment_address.cc.html +++ b/html/subx/034compute_segment_address.cc.html @@ -15,11 +15,11 @@ body { font-size: 12pt; font-family: monospace; color: #aaaaaa; background-color a { color:#eeeeee; text-decoration: none; } a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } +.cSpecial { color: #008000; } .traceContains { color: #008000; } .LineNr { color: #444444; } .Constant { color: #00a0a0; } .Delimiter { color: #800080; } -.Special { color: #c00000; } .Identifier { color: #c0a020; } .Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } @@ -59,77 +59,144 @@ if ('onhashchange' in window) {
- 1 //: Start allowing us to not specify precise addresses for the start of each
- 2 //: segment.
- 3 //: This gives up a measure of control in placing code and data.
- 4 
- 5 //: segment address computation requires setting Mem_offset in test mode to what it'll be in run mode
- 6 :(scenario segment_name)
- 7 % Mem_offset = CODE_START;
- 8 == code
- 9 05/add 0x0d0c0b0a/imm32  # add 0x0d0c0b0a to EAX
-10 # code starts at 0x08048000 + p_offset, which is 0x54 for a single-segment binary
-11 +load: 0x08048054 -> 05
-12 +load: 0x08048055 -> 0a
-13 +load: 0x08048056 -> 0b
-14 +load: 0x08048057 -> 0c
-15 +load: 0x08048058 -> 0d
-16 +run: add imm32 0x0d0c0b0a to reg EAX
-17 +run: storing 0x0d0c0b0a
-18 
-19 :(before "End Level-2 Transforms")
-20 Transform.push_back(compute_segment_starts);
-21 
-22 :(code)
-23 void compute_segment_starts(program& p) {
-24   trace(99, "transform") << "-- compute segment addresses" << end();
-25   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
-26   for (size_t i = 0;  i < p.segments.size();  ++i) {
-27     segment& curr = p.segments.at(i);
-28     if (curr.start == 0) {
-29       curr.start = CODE_START + i*SEGMENT_SIZE + p_offset;
-30       trace(99, "transform") << "segment " << i << " begins at address 0x" << HEXWORD << curr.start << end();
-31     }
-32     p_offset += size_of(curr);
-33     assert(p_offset < SEGMENT_SIZE);  // for now we get less and less available space in each successive segment
-34   }
-35 }
-36 
-37 uint32_t size_of(const segment& s) {
-38   uint32_t sum = 0;
-39   for (int i = 0;  i < SIZE(s.lines);  ++i)
-40     sum += num_bytes(s.lines.at(i));
-41   return sum;
-42 }
-43 
-44 // Assumes all bitfields are packed.
-45 uint32_t num_bytes(const line& inst) {
-46   uint32_t sum = 0;
-47   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-48     const word& curr = inst.words.at(i);
-49     if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32"))  // only multi-byte operands
-50       sum += 4;
-51     // End num_bytes(curr) Special-cases
-52     else
-53       sum++;
-54   }
-55   return sum;
-56 }
-57 
-58 //: Dependencies:
-59 //: - We'd like to compute segment addresses before setting up global variables,
-60 //:   because computing addresses for global variables requires knowing where
-61 //:   the data segment starts.
-62 //: - We'd like to finish expanding labels before computing segment addresses,
-63 //:   because it would make computing the sizes of segments more self-contained
-64 //:   (num_bytes).
-65 //:
-66 //: Decision: compute segment addresses before expanding labels, by being
-67 //: aware in this layer of certain operand types that will eventually occupy
-68 //: multiple bytes.
-69 //:
-70 //: The layer to expand labels later hooks into num_bytes() to teach this
-71 //: layer that labels occupy zero space in the binary.
+  1 //: Start allowing us to not specify precise addresses for the start of each
+  2 //: segment.
+  3 //: This gives up a measure of control in placing code and data.
+  4 
+  5 :(scenario segment_name)
+  6 == code
+  7 05/add 0x0d0c0b0a/imm32  # add 0x0d0c0b0a to EAX
+  8 # code starts at 0x08048000 + p_offset, which is 0x54 for a single-segment binary
+  9 +load: 0x08048054 -> 05
+ 10 +load: 0x08048055 -> 0a
+ 11 +load: 0x08048056 -> 0b
+ 12 +load: 0x08048057 -> 0c
+ 13 +load: 0x08048058 -> 0d
+ 14 +run: add imm32 0x0d0c0b0a to reg EAX
+ 15 +run: storing 0x0d0c0b0a
+ 16 
+ 17 //: Update the parser to handle non-numeric segment name.
+ 18 //:
+ 19 //: We'll also support repeated segments with non-numeric names.
+ 20 //: When we encounter a new reference to an existing segment we'll *prepend*
+ 21 //: the new data to existing data for the segment.
+ 22 
+ 23 :(before "End Globals")
+ 24 map</*name*/string, int> Segment_index;
+ 25 bool Currently_parsing_named_segment = false;  // global to permit cross-layer communication
+ 26 int Currently_parsing_segment_index = -1;  // global to permit cross-layer communication
+ 27 :(before "End Reset")
+ 28 Segment_index.clear();
+ 29 Currently_parsing_named_segment = false;
+ 30 Currently_parsing_segment_index = -1;
+ 31 
+ 32 :(before "End Segment Parsing Special-cases(segment_title)")
+ 33 if (!starts_with(segment_title, "0x")) {
+ 34   Currently_parsing_named_segment = true;
+ 35   if (!contains_key(Segment_index, segment_title)) {
+ 36     trace(99, "parse") << "new segment '" << segment_title << "'" << end();
+ 37     if (segment_title == "code")
+ 38       put(Segment_index, segment_title, 0);
+ 39     else if (segment_title == "data")
+ 40       put(Segment_index, segment_title, 1);
+ 41     else
+ 42       put(Segment_index, segment_title, max(2, SIZE(out.segments)));
+ 43     out.segments.push_back(segment());
+ 44   }
+ 45   else {
+ 46     trace(99, "parse") << "prepending to segment '" << segment_title << "'" << end();
+ 47   }
+ 48   Currently_parsing_segment_index = get(Segment_index, segment_title);
+ 49 }
+ 50 
+ 51 :(before "End flush(p, lines) Special-cases")
+ 52 if (Currently_parsing_named_segment) {
+ 53   if (p.segments.empty() || Currently_parsing_segment_index < 0) {
+ 54     raise << "input does not start with a '==' section header\n" << end();
+ 55     return;
+ 56   }
+ 57   trace(99, "parse") << "flushing to segment" << end();
+ 58   vector<line>& curr_segment_data = p.segments.at(Currently_parsing_segment_index).lines;
+ 59   curr_segment_data.insert(curr_segment_data.begin(), lines.begin(), lines.end());
+ 60   lines.clear();
+ 61   Currently_parsing_named_segment = false;
+ 62   Currently_parsing_segment_index = -1;
+ 63   return;
+ 64 }
+ 65 
+ 66 :(scenario repeated_segment_merges_data)
+ 67 == code
+ 68 05/add 0x0d0c0b0a/imm32  # add 0x0d0c0b0a to EAX
+ 69 == code
+ 70 2d/subtract 0xddccbbaa/imm32  # subtract 0xddccbbaa from EAX
+ 71 +parse: new segment 'code'
+ 72 +parse: prepending to segment 'code'
+ 73 +load: 0x08048054 -> 2d
+ 74 +load: 0x08048055 -> aa
+ 75 +load: 0x08048056 -> bb
+ 76 +load: 0x08048057 -> cc
+ 77 +load: 0x08048058 -> dd
+ 78 +load: 0x08048059 -> 05
+ 79 +load: 0x0804805a -> 0a
+ 80 +load: 0x0804805b -> 0b
+ 81 +load: 0x0804805c -> 0c
+ 82 +load: 0x0804805d -> 0d
+ 83 
+ 84 //: compute segment address
+ 85 
+ 86 :(before "End Level-2 Transforms")
+ 87 Transform.push_back(compute_segment_starts);
+ 88 
+ 89 :(code)
+ 90 void compute_segment_starts(program& p) {
+ 91   trace(99, "transform") << "-- compute segment addresses" << end();
+ 92   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+ 93   for (size_t i = 0;  i < p.segments.size();  ++i) {
+ 94     segment& curr = p.segments.at(i);
+ 95     if (curr.start == 0) {
+ 96       curr.start = CODE_START + i*SEGMENT_SIZE + p_offset;
+ 97       trace(99, "transform") << "segment " << i << " begins at address 0x" << HEXWORD << curr.start << end();
+ 98     }
+ 99     p_offset += size_of(curr);
+100     assert(p_offset < SEGMENT_SIZE);  // for now we get less and less available space in each successive segment
+101   }
+102 }
+103 
+104 uint32_t size_of(const segment& s) {
+105   uint32_t sum = 0;
+106   for (int i = 0;  i < SIZE(s.lines);  ++i)
+107     sum += num_bytes(s.lines.at(i));
+108   return sum;
+109 }
+110 
+111 // Assumes all bitfields are packed.
+112 uint32_t num_bytes(const line& inst) {
+113   uint32_t sum = 0;
+114   for (int i = 0;  i < SIZE(inst.words);  ++i) {
+115     const word& curr = inst.words.at(i);
+116     if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32"))  // only multi-byte operands
+117       sum += 4;
+118     // End num_bytes(curr) Special-cases
+119     else
+120       sum++;
+121   }
+122   return sum;
+123 }
+124 
+125 //: Dependencies:
+126 //: - We'd like to compute segment addresses before setting up global variables,
+127 //:   because computing addresses for global variables requires knowing where
+128 //:   the data segment starts.
+129 //: - We'd like to finish expanding labels before computing segment addresses,
+130 //:   because it would make computing the sizes of segments more self-contained
+131 //:   (num_bytes).
+132 //:
+133 //: Decision: compute segment addresses before expanding labels, by being
+134 //: aware in this layer of certain operand types that will eventually occupy
+135 //: multiple bytes.
+136 //:
+137 //: The layer to expand labels later hooks into num_bytes() to teach this
+138 //: layer that labels occupy zero space in the binary.
 
diff --git a/html/subx/035labels.cc.html b/html/subx/035labels.cc.html index 918c14c7..8774406e 100644 --- a/html/subx/035labels.cc.html +++ b/html/subx/035labels.cc.html @@ -15,7 +15,7 @@ body { font-size: 12pt; font-family: monospace; color: #aaaaaa; background-color a { color:#eeeeee; text-decoration: none; } a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } -.traceContains { color: #008000; } +.cSpecial { color: #008000; } .PreProc { color: #800080; } .LineNr { color: #444444; } .Constant { color: #00a0a0; } @@ -25,7 +25,7 @@ a:hover { text-decoration: underline; } .Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } -.cSpecial { color: #008000; } +.traceContains { color: #008000; } --> @@ -80,231 +80,262 @@ if ('onhashchange' in window) { 18 //: be a single character long. 'a' is not a hex number, it's a variable. 19 //: Later layers may add more conventions partitioning the space of names. But 20 //: the above rules will remain inviolate. - 21 void check_valid_name(const string& s) { - 22 if (s.empty()) { - 23 raise << "empty name!\n" << end(); - 24 return; - 25 } - 26 if (s.at(0) == '-') - 27 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end(); - 28 if (s.substr(0, 2) == "0x") { - 29 raise << "'" << s << "' looks like a hex number; use a different name\n" << end(); - 30 return; - 31 } - 32 if (isdigit(s.at(0))) - 33 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end(); - 34 if (SIZE(s) == 2) - 35 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end(); - 36 } - 37 - 38 :(scenarios transform) - 39 :(scenario map_label) - 40 == 0x1 - 41 # instruction effective address operand displacement immediate - 42 # op subop mod rm32 base index scale r32 - 43 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes - 44 loop: - 45 05 0x0d0c0b0a/imm32 # add to EAX - 46 +transform: label 'loop' is at address 1 - 47 - 48 :(before "End Level-2 Transforms") - 49 Transform.push_back(rewrite_labels); - 50 :(code) - 51 void rewrite_labels(program& p) { - 52 trace(99, "transform") << "-- rewrite labels" << end(); - 53 if (p.segments.empty()) return; - 54 segment& code = p.segments.at(0); - 55 map<string, int32_t> byte_index; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits - 56 compute_byte_indices_for_labels(code, byte_index); - 57 if (trace_contains_errors()) return; - 58 drop_labels(code); - 59 if (trace_contains_errors()) return; - 60 replace_labels_with_displacements(code, byte_index); - 61 } - 62 - 63 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) { - 64 int current_byte = 0; - 65 for (int i = 0; i < SIZE(code.lines); ++i) { - 66 const line& inst = code.lines.at(i); - 67 for (int j = 0; j < SIZE(inst.words); ++j) { - 68 const word& curr = inst.words.at(j); - 69 // hack: if we have any operand metadata left after previous transforms, - 70 // deduce its size - 71 // Maybe we should just move this transform to before instruction - 72 // packing, and deduce the size of *all* operands. But then we'll also - 73 // have to deal with bitfields. - 74 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) { - 75 if (*curr.data.rbegin() == ':') - 76 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end(); - 77 current_byte += 4; - 78 } - 79 // automatically handle /disp8 and /imm8 here - 80 else if (*curr.data.rbegin() != ':') { - 81 ++current_byte; - 82 } - 83 else { - 84 string label = drop_last(curr.data); - 85 // ensure labels look sufficiently different from raw hex - 86 check_valid_name(label); - 87 if (trace_contains_errors()) return; - 88 if (contains_any_operand_metadata(curr)) - 89 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); - 90 if (j > 0) - 91 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end(); - 92 if (Dump_map) - 93 cerr << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n'; - 94 put(byte_index, label, current_byte); - 95 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end(); - 96 // no modifying current_byte; label definitions won't be in the final binary - 97 } - 98 } - 99 } -100 } -101 -102 :(before "End Globals") -103 bool Dump_map = false; // currently used only by 'subx translate' -104 :(before "End Commandline Options") -105 else if (is_equal(*arg, "--map")) { -106 Dump_map = true; -107 } -108 -109 :(code) -110 void drop_labels(segment& code) { -111 for (int i = 0; i < SIZE(code.lines); ++i) { -112 line& inst = code.lines.at(i); -113 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label); -114 inst.words.erase(new_end, inst.words.end()); -115 } -116 } -117 -118 bool is_label(const word& w) { -119 return *w.data.rbegin() == ':'; -120 } -121 -122 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) { -123 int32_t byte_index_next_instruction_starts_at = 0; -124 for (int i = 0; i < SIZE(code.lines); ++i) { -125 line& inst = code.lines.at(i); -126 byte_index_next_instruction_starts_at += num_bytes(inst); -127 line new_inst; -128 for (int j = 0; j < SIZE(inst.words); ++j) { -129 const word& curr = inst.words.at(j); -130 if (contains_key(byte_index, curr.data)) { -131 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; -132 if (has_operand_metadata(curr, "disp8")) { -133 if (displacement > 0xff || displacement < -0x7f) -134 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); -135 else -136 emit_hex_bytes(new_inst, displacement, 1); -137 } -138 else if (has_operand_metadata(curr, "disp16")) { -139 if (displacement > 0xffff || displacement < -0x7fff) -140 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); -141 else -142 emit_hex_bytes(new_inst, displacement, 2); -143 } -144 else if (has_operand_metadata(curr, "disp32")) { -145 emit_hex_bytes(new_inst, displacement, 4); -146 } -147 } -148 else { -149 new_inst.words.push_back(curr); -150 } -151 } -152 inst.words.swap(new_inst.words); -153 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); -154 } -155 } -156 -157 string data_to_string(const line& inst) { -158 ostringstream out; -159 for (int i = 0; i < SIZE(inst.words); ++i) { -160 if (i > 0) out << ' '; -161 out << inst.words.at(i).data; -162 } -163 return out.str(); -164 } -165 -166 string drop_last(const string& s) { -167 return string(s.begin(), --s.end()); -168 } -169 -170 //: Label definitions must be the first word on a line. No jumping inside -171 //: instructions. -172 //: They should also be the only word on a line. -173 //: However, you can absolutely have multiple labels map to the same address, -174 //: as long as they're on separate lines. -175 -176 :(scenario multiple_labels_at) -177 == 0x1 -178 # instruction effective address operand displacement immediate -179 # op subop mod rm32 base index scale r32 -180 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -181 # address 1 -182 loop: -183 $loop2: -184 # address 1 (labels take up no space) -185 05 0x0d0c0b0a/imm32 # add to EAX -186 # address 6 -187 eb $loop2/disp8 -188 # address 8 -189 eb $loop3/disp8 -190 # address 0xa -191 $loop3: -192 +transform: label 'loop' is at address 1 -193 +transform: label '$loop2' is at address 1 -194 +transform: label '$loop3' is at address a -195 # first jump is to -7 -196 +transform: instruction after transform: 'eb f9' -197 # second jump is to 0 (fall through) -198 +transform: instruction after transform: 'eb 00' -199 -200 :(scenario label_too_short) -201 % Hide_errors = true; -202 == 0x1 -203 # instruction effective address operand displacement immediate -204 # op subop mod rm32 base index scale r32 -205 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -206 xz: -207 05 0x0d0c0b0a/imm32 # add to EAX -208 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name -209 -210 :(scenario label_hex) -211 % Hide_errors = true; -212 == 0x1 -213 # instruction effective address operand displacement immediate -214 # op subop mod rm32 base index scale r32 -215 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -216 0xab: + 21 + 22 :(before "End looks_like_hex_int(s) Detectors") + 23 if (SIZE(s) == 2) return true; + 24 + 25 :(scenarios transform) + 26 :(scenario pack_immediate_ignores_single_byte_nondigit_operand) + 27 % Hide_errors = true; + 28 == 0x1 + 29 b9/copy a/imm32 # copy to ECX + 30 +transform: packing instruction 'b9/copy a/imm32' + 31 # no change (we're just not printing metadata to the trace) + 32 +transform: instruction after packing: 'b9 a' + 33 + 34 :(scenario pack_immediate_ignores_3_hex_digit_operand) + 35 % Hide_errors = true; + 36 == 0x1 + 37 b9/copy aaa/imm32 # copy to ECX + 38 +transform: packing instruction 'b9/copy aaa/imm32' + 39 # no change (we're just not printing metadata to the trace) + 40 +transform: instruction after packing: 'b9 aaa' + 41 + 42 :(scenario pack_immediate_ignores_non_hex_operand) + 43 % Hide_errors = true; + 44 == 0x1 + 45 b9/copy xxx/imm32 # copy to ECX + 46 +transform: packing instruction 'b9/copy xxx/imm32' + 47 # no change (we're just not printing metadata to the trace) + 48 +transform: instruction after packing: 'b9 xxx' + 49 + 50 //: a helper we'll find handy later + 51 :(code) + 52 void check_valid_name(const string& s) { + 53 if (s.empty()) { + 54 raise << "empty name!\n" << end(); + 55 return; + 56 } + 57 if (s.at(0) == '-') + 58 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end(); + 59 if (s.substr(0, 2) == "0x") { + 60 raise << "'" << s << "' looks like a hex number; use a different name\n" << end(); + 61 return; + 62 } + 63 if (isdigit(s.at(0))) + 64 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end(); + 65 if (SIZE(s) == 2) + 66 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end(); + 67 } + 68 + 69 //: Now that that's done, let's start using names as labels. + 70 + 71 :(scenario map_label) + 72 == 0x1 + 73 # instruction effective address operand displacement immediate + 74 # op subop mod rm32 base index scale r32 + 75 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes + 76 loop: + 77 05 0x0d0c0b0a/imm32 # add to EAX + 78 +transform: label 'loop' is at address 1 + 79 + 80 :(before "End Level-2 Transforms") + 81 Transform.push_back(rewrite_labels); + 82 :(code) + 83 void rewrite_labels(program& p) { + 84 trace(99, "transform") << "-- rewrite labels" << end(); + 85 if (p.segments.empty()) return; + 86 segment& code = p.segments.at(0); + 87 map<string, int32_t> byte_index; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits + 88 compute_byte_indices_for_labels(code, byte_index); + 89 if (trace_contains_errors()) return; + 90 drop_labels(code); + 91 if (trace_contains_errors()) return; + 92 replace_labels_with_displacements(code, byte_index); + 93 } + 94 + 95 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) { + 96 int current_byte = 0; + 97 for (int i = 0; i < SIZE(code.lines); ++i) { + 98 const line& inst = code.lines.at(i); + 99 for (int j = 0; j < SIZE(inst.words); ++j) { +100 const word& curr = inst.words.at(j); +101 // hack: if we have any operand metadata left after previous transforms, +102 // deduce its size +103 // Maybe we should just move this transform to before instruction +104 // packing, and deduce the size of *all* operands. But then we'll also +105 // have to deal with bitfields. +106 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) { +107 if (*curr.data.rbegin() == ':') +108 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end(); +109 current_byte += 4; +110 } +111 // automatically handle /disp8 and /imm8 here +112 else if (*curr.data.rbegin() != ':') { +113 ++current_byte; +114 } +115 else { +116 string label = drop_last(curr.data); +117 // ensure labels look sufficiently different from raw hex +118 check_valid_name(label); +119 if (trace_contains_errors()) return; +120 if (contains_any_operand_metadata(curr)) +121 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); +122 if (j > 0) +123 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end(); +124 if (Dump_map) +125 cerr << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n'; +126 put(byte_index, label, current_byte); +127 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end(); +128 // no modifying current_byte; label definitions won't be in the final binary +129 } +130 } +131 } +132 } +133 +134 :(before "End Globals") +135 bool Dump_map = false; // currently used only by 'subx translate' +136 :(before "End Commandline Options") +137 else if (is_equal(*arg, "--map")) { +138 Dump_map = true; +139 } +140 +141 :(code) +142 void drop_labels(segment& code) { +143 for (int i = 0; i < SIZE(code.lines); ++i) { +144 line& inst = code.lines.at(i); +145 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label); +146 inst.words.erase(new_end, inst.words.end()); +147 } +148 } +149 +150 bool is_label(const word& w) { +151 return *w.data.rbegin() == ':'; +152 } +153 +154 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) { +155 int32_t byte_index_next_instruction_starts_at = 0; +156 for (int i = 0; i < SIZE(code.lines); ++i) { +157 line& inst = code.lines.at(i); +158 byte_index_next_instruction_starts_at += num_bytes(inst); +159 line new_inst; +160 for (int j = 0; j < SIZE(inst.words); ++j) { +161 const word& curr = inst.words.at(j); +162 if (contains_key(byte_index, curr.data)) { +163 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; +164 if (has_operand_metadata(curr, "disp8")) { +165 if (displacement > 0xff || displacement < -0x7f) +166 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); +167 else +168 emit_hex_bytes(new_inst, displacement, 1); +169 } +170 else if (has_operand_metadata(curr, "disp16")) { +171 if (displacement > 0xffff || displacement < -0x7fff) +172 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); +173 else +174 emit_hex_bytes(new_inst, displacement, 2); +175 } +176 else if (has_operand_metadata(curr, "disp32")) { +177 emit_hex_bytes(new_inst, displacement, 4); +178 } +179 } +180 else { +181 new_inst.words.push_back(curr); +182 } +183 } +184 inst.words.swap(new_inst.words); +185 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); +186 } +187 } +188 +189 string data_to_string(const line& inst) { +190 ostringstream out; +191 for (int i = 0; i < SIZE(inst.words); ++i) { +192 if (i > 0) out << ' '; +193 out << inst.words.at(i).data; +194 } +195 return out.str(); +196 } +197 +198 string drop_last(const string& s) { +199 return string(s.begin(), --s.end()); +200 } +201 +202 //: Label definitions must be the first word on a line. No jumping inside +203 //: instructions. +204 //: They should also be the only word on a line. +205 //: However, you can absolutely have multiple labels map to the same address, +206 //: as long as they're on separate lines. +207 +208 :(scenario multiple_labels_at) +209 == 0x1 +210 # instruction effective address operand displacement immediate +211 # op subop mod rm32 base index scale r32 +212 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +213 # address 1 +214 loop: +215 $loop2: +216 # address 1 (labels take up no space) 217 05 0x0d0c0b0a/imm32 # add to EAX -218 +error: '0xab' looks like a hex number; use a different name -219 -220 :(scenario label_negative_hex) -221 % Hide_errors = true; -222 == 0x1 -223 # instruction effective address operand displacement immediate -224 # op subop mod rm32 base index scale r32 -225 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -226 -a: # indent to avoid looking like a trace_should_not_contain command for this scenario -227 05 0x0d0c0b0a/imm32 # add to EAX -228 +error: '-a' starts with '-', which can be confused with a negative number; use a different name -229 -230 //: now that we have labels, we need to adjust segment size computation to -231 //: ignore them. -232 -233 :(scenario segment_size_ignores_labels) -234 % Mem_offset = CODE_START; -235 == code # 0x08048074 -236 05/add 0x0d0c0b0a/imm32 # 5 bytes -237 foo: # 0 bytes -238 == data # 0x08049079 -239 bar: -240 00 -241 +transform: segment 1 begins at address 0x08049079 -242 -243 :(before "End num_bytes(curr) Special-cases") -244 else if (is_label(curr)) -245 ; // don't count it +218 # address 6 +219 eb $loop2/disp8 +220 # address 8 +221 eb $loop3/disp8 +222 # address 0xa +223 $loop3: +224 +transform: label 'loop' is at address 1 +225 +transform: label '$loop2' is at address 1 +226 +transform: label '$loop3' is at address a +227 # first jump is to -7 +228 +transform: instruction after transform: 'eb f9' +229 # second jump is to 0 (fall through) +230 +transform: instruction after transform: 'eb 00' +231 +232 :(scenario label_too_short) +233 % Hide_errors = true; +234 == 0x1 +235 # instruction effective address operand displacement immediate +236 # op subop mod rm32 base index scale r32 +237 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +238 xz: +239 05 0x0d0c0b0a/imm32 # add to EAX +240 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name +241 +242 :(scenario label_hex) +243 % Hide_errors = true; +244 == 0x1 +245 # instruction effective address operand displacement immediate +246 # op subop mod rm32 base index scale r32 +247 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +248 0xab: +249 05 0x0d0c0b0a/imm32 # add to EAX +250 +error: '0xab' looks like a hex number; use a different name +251 +252 :(scenario label_negative_hex) +253 % Hide_errors = true; +254 == 0x1 +255 # instruction effective address operand displacement immediate +256 # op subop mod rm32 base index scale r32 +257 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +258 -a: # indent to avoid looking like a trace_should_not_contain command for this scenario +259 05 0x0d0c0b0a/imm32 # add to EAX +260 +error: '-a' starts with '-', which can be confused with a negative number; use a different name +261 +262 //: now that we have labels, we need to adjust segment size computation to +263 //: ignore them. +264 +265 :(scenario segment_size_ignores_labels) +266 == code # 0x08048074 +267 05/add 0x0d0c0b0a/imm32 # 5 bytes +268 foo: # 0 bytes +269 == data # 0x08049079 +270 bar: +271 00 +272 +transform: segment 1 begins at address 0x08049079 +273 +274 :(before "End num_bytes(curr) Special-cases") +275 else if (is_label(curr)) +276 ; // don't count it diff --git a/html/subx/036global_variables.cc.html b/html/subx/036global_variables.cc.html index e4b6f75b..f5e53a17 100644 --- a/html/subx/036global_variables.cc.html +++ b/html/subx/036global_variables.cc.html @@ -21,12 +21,12 @@ a:hover { text-decoration: underline; } .LineNr { color: #444444; } .Constant { color: #00a0a0; } .Delimiter { color: #800080; } -.Special { color: #c00000; } +.SalientComment { color: #00ffff; } .Identifier { color: #c0a020; } .Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } -.SalientComment { color: #00ffff; } +.Special { color: #c00000; } .PreProc { color: #800080; } --> @@ -68,84 +68,84 @@ if ('onhashchange' in window) { 4 //: However, they can only be used in imm32 and not disp32 operands. And they 5 //: can't be used with jump and call instructions. 6 //: - 7 //: This layer much the same structure as rewriting labels. + 7 //: This layer has much the same structure as rewriting labels. 8 9 :(scenario global_variable) - 10 % Mem_offset = CODE_START; - 11 % Mem.resize(0x2000); - 12 == code - 13 b9/copy x/imm32 # copy to ECX - 14 == data - 15 x: - 16 00 00 00 00 - 17 +transform: global variable 'x' is at address 0x08049079 - 18 - 19 :(before "End Level-2 Transforms") - 20 Transform.push_back(rewrite_global_variables); - 21 :(code) - 22 void rewrite_global_variables(program& p) { - 23 trace(99, "transform") << "-- rewrite global variables" << end(); - 24 map<string, uint32_t> address; - 25 compute_addresses_for_global_variables(p, address); - 26 if (trace_contains_errors()) return; - 27 drop_global_variables(p); - 28 replace_global_variables_with_addresses(p, address); - 29 } - 30 - 31 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) { - 32 for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) - 33 compute_addresses_for_global_variables(p.segments.at(i), address); - 34 } - 35 - 36 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) { - 37 int current_address = s.start; - 38 for (int i = 0; i < SIZE(s.lines); ++i) { - 39 const line& inst = s.lines.at(i); - 40 for (int j = 0; j < SIZE(inst.words); ++j) { - 41 const word& curr = inst.words.at(j); - 42 if (*curr.data.rbegin() != ':') { - 43 ++current_address; - 44 } - 45 else { - 46 string variable = drop_last(curr.data); - 47 // ensure variables look sufficiently different from raw hex - 48 check_valid_name(variable); - 49 if (trace_contains_errors()) return; - 50 if (j > 0) - 51 raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end(); - 52 put(address, variable, current_address); - 53 trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end(); - 54 // no modifying current_address; global variable definitions won't be in the final binary - 55 } - 56 } - 57 } - 58 } - 59 - 60 void drop_global_variables(program& p) { - 61 for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) - 62 drop_labels(p.segments.at(i)); - 63 } - 64 - 65 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) { - 66 if (p.segments.empty()) return; - 67 segment& code = p.segments.at(0); - 68 for (int i = 0; i < SIZE(code.lines); ++i) { - 69 line& inst = code.lines.at(i); - 70 line new_inst; - 71 for (int j = 0; j < SIZE(inst.words); ++j) { - 72 const word& curr = inst.words.at(j); - 73 if (!contains_key(address, curr.data)) { + 10 == code + 11 b9/copy x/imm32 # copy to ECX + 12 == data + 13 x: + 14 00 00 00 00 + 15 +transform: global variable 'x' is at address 0x08049079 + 16 + 17 :(before "End Level-2 Transforms") + 18 Transform.push_back(rewrite_global_variables); + 19 :(code) + 20 void rewrite_global_variables(program& p) { + 21 trace(99, "transform") << "-- rewrite global variables" << end(); + 22 map<string, uint32_t> address; + 23 compute_addresses_for_global_variables(p, address); + 24 if (trace_contains_errors()) return; + 25 drop_global_variables(p); + 26 replace_global_variables_with_addresses(p, address); + 27 } + 28 + 29 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) { + 30 for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) + 31 compute_addresses_for_global_variables(p.segments.at(i), address); + 32 } + 33 + 34 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) { + 35 int current_address = s.start; + 36 for (int i = 0; i < SIZE(s.lines); ++i) { + 37 const line& inst = s.lines.at(i); + 38 for (int j = 0; j < SIZE(inst.words); ++j) { + 39 const word& curr = inst.words.at(j); + 40 if (*curr.data.rbegin() != ':') { + 41 ++current_address; + 42 } + 43 else { + 44 string variable = drop_last(curr.data); + 45 // ensure variables look sufficiently different from raw hex + 46 check_valid_name(variable); + 47 if (trace_contains_errors()) return; + 48 if (j > 0) + 49 raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end(); + 50 put(address, variable, current_address); + 51 trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end(); + 52 // no modifying current_address; global variable definitions won't be in the final binary + 53 } + 54 } + 55 } + 56 } + 57 + 58 void drop_global_variables(program& p) { + 59 for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) + 60 drop_labels(p.segments.at(i)); + 61 } + 62 + 63 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) { + 64 if (p.segments.empty()) return; + 65 segment& code = p.segments.at(0); + 66 for (int i = 0; i < SIZE(code.lines); ++i) { + 67 line& inst = code.lines.at(i); + 68 line new_inst; + 69 for (int j = 0; j < SIZE(inst.words); ++j) { + 70 const word& curr = inst.words.at(j); + 71 if (!contains_key(address, curr.data)) { + 72 if (!looks_like_hex_int(curr.data)) + 73 raise << "missing reference to global '" << curr.data << "'\n" << end(); 74 new_inst.words.push_back(curr); 75 continue; 76 } 77 if (!valid_use_of_global_variable(curr)) { - 78 raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end(); + 78 raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end(); 79 return; 80 } - 81 emit_hex_bytes(new_inst, get(address, curr.data), 4); + 81 emit_hex_bytes(new_inst, get(address, curr.data), 4); 82 } 83 inst.words.swap(new_inst.words); - 84 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); + 84 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); 85 } 86 } 87 @@ -159,7 +159,7 @@ if ('onhashchange' in window) { 95 //: requires first saving some data early before we pack operands 96 97 :(after "Begin Level-2 Transforms") - 98 Transform.push_back(correlate_disp32_with_mod); + 98 Transform.push_back(correlate_disp32_with_mod); 99 :(code) 100 void correlate_disp32_with_mod(program& p) { 101 if (p.segments.empty()) return; @@ -192,7 +192,7 @@ if ('onhashchange' in window) { 128 % Hide_errors = true; 129 == code 130 eb/jump x/disp8 -131 == data +131 == data 132 x: 133 00 00 00 00 134 +error: 'eb/jump x/disp8': can't refer to global variable 'x' @@ -203,7 +203,7 @@ if ('onhashchange' in window) { 139 % Hide_errors = true; 140 == code 141 e8/call x/disp32 -142 == data +142 == data 143 x: 144 00 00 00 00 145 +error: 'e8/call x/disp32': can't refer to global variable 'x' @@ -211,34 +211,32 @@ if ('onhashchange' in window) { 147 #? +error: can't call to the data segment ('x') 148 149 :(scenario disp32_data_with_modrm) -150 % Mem_offset = CODE_START; -151 % Mem.resize(0x2000); -152 == code -153 8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32 -154 == -155 x: -156 00 00 00 00 -157 $error: 0 -158 -159 :(scenarios transform) -160 :(scenario disp32_data_with_call) -161 == code -162 foo: -163 e8/call bar/disp32 -164 bar: -165 $error: 0 -166 -167 :(code) -168 string to_full_string(const line& in) { -169 ostringstream out; -170 for (int i = 0; i < SIZE(in.words); ++i) { -171 if (i > 0) out << ' '; -172 out << in.words.at(i).data; -173 for (int j = 0; j < SIZE(in.words.at(i).metadata); ++j) -174 out << '/' << in.words.at(i).metadata.at(j); -175 } -176 return out.str(); -177 } +150 == code +151 8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32 +152 == data +153 x: +154 00 00 00 00 +155 $error: 0 +156 +157 :(scenarios transform) +158 :(scenario disp32_data_with_call) +159 == code +160 foo: +161 e8/call bar/disp32 +162 bar: +163 $error: 0 +164 +165 :(code) +166 string to_full_string(const line& in) { +167 ostringstream out; +168 for (int i = 0; i < SIZE(in.words); ++i) { +169 if (i > 0) out << ' '; +170 out << in.words.at(i).data; +171 for (int j = 0; j < SIZE(in.words.at(i).metadata); ++j) +172 out << '/' << in.words.at(i).metadata.at(j); +173 } +174 return out.str(); +175 } diff --git a/html/subx/038---literal_strings.cc.html b/html/subx/038---literal_strings.cc.html index 16a42c35..25066a8b 100644 --- a/html/subx/038---literal_strings.cc.html +++ b/html/subx/038---literal_strings.cc.html @@ -17,10 +17,9 @@ a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } .cSpecial { color: #008000; } .LineNr { color: #444444; } -.traceAbsent { color: #c00000; } .Constant { color: #00a0a0; } .Delimiter { color: #800080; } -.Special { color: #c00000; } +.traceAbsent { color: #c00000; } .Identifier { color: #c0a020; } .Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } @@ -66,215 +65,214 @@ if ('onhashchange' in window) { 4 //: always be the second segment). 5 6 :(scenario transform_literal_string) - 7 % Mem_offset = CODE_START; - 8 % Mem.resize(AFTER_STACK - CODE_START); - 9 == code - 10 b8/copy "test"/imm32 # copy to EAX - 11 +transform: -- move literal strings to data segment - 12 +transform: adding global variable '__subx_global_1' containing "test" - 13 +transform: instruction after transform: 'b8 __subx_global_1' - 14 - 15 //: We don't rely on any transforms running in previous layers, but this layer - 16 //: knows about labels and global variables and will emit them for previous - 17 //: layers to transform. - 18 :(after "Begin Transforms") - 19 // Begin Level-3 Transforms - 20 Transform.push_back(transform_literal_strings); - 21 // End Level-3 Transforms - 22 - 23 :(before "End Globals") - 24 int Next_auto_global = 1; - 25 :(code) - 26 void transform_literal_strings(program& p) { - 27 trace(99, "transform") << "-- move literal strings to data segment" << end(); - 28 if (p.segments.empty()) return; - 29 segment& code = p.segments.at(0); - 30 segment data; - 31 for (int i = 0; i < SIZE(code.lines); ++i) { - 32 line& inst = code.lines.at(i); - 33 for (int j = 0; j < SIZE(inst.words); ++j) { - 34 word& curr = inst.words.at(j); - 35 if (curr.data.at(0) != '"') continue; - 36 ostringstream global_name; - 37 global_name << "__subx_global_" << Next_auto_global; - 38 ++Next_auto_global; - 39 add_global_to_data_segment(global_name.str(), curr, data); - 40 curr.data = global_name.str(); - 41 } - 42 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); - 43 } - 44 if (data.lines.empty()) return; - 45 if (SIZE(p.segments) < 2) { - 46 p.segments.resize(2); - 47 p.segments.at(1).lines.swap(data.lines); - 48 } - 49 vector<line>& existing_data = p.segments.at(1).lines; - 50 existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end()); - 51 } - 52 - 53 void add_global_to_data_segment(const string& name, const word& value, segment& data) { - 54 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end(); - 55 // emit label - 56 data.lines.push_back(label(name)); - 57 // emit size for size-prefixed array - 58 data.lines.push_back(line()); - 59 emit_hex_bytes(data.lines.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/); - 60 // emit data byte by byte - 61 data.lines.push_back(line()); - 62 line& curr = data.lines.back(); - 63 for (int i = /*skip start quote*/1; i < SIZE(value.data)-/*skip end quote*/1; ++i) { - 64 char c = value.data.at(i); - 65 curr.words.push_back(word()); - 66 curr.words.back().data = hex_byte_to_string(c); - 67 curr.words.back().metadata.push_back(string(1, c)); - 68 } - 69 } - 70 - 71 line label(string s) { - 72 line result; - 73 result.words.push_back(word()); - 74 result.words.back().data = (s+":"); - 75 return result; - 76 } - 77 - 78 //: Within strings, whitespace is significant. So we need to redo our instruction - 79 //: parsing. - 80 - 81 :(scenarios parse_instruction_character_by_character) - 82 :(scenario instruction_with_string_literal) - 83 a "abc def" z # two spaces inside string - 84 +parse2: word: a - 85 +parse2: word: "abc def" - 86 +parse2: word: z - 87 # no other words - 88 $parse2: 3 - 89 - 90 :(before "End Line Parsing Special-cases(line_data -> l)") - 91 if (line_data.find('"') != string::npos) { // can cause false-positives, but we can handle them - 92 parse_instruction_character_by_character(line_data, l); - 93 continue; - 94 } - 95 - 96 :(code) - 97 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) { - 98 // parse literals - 99 istringstream in(line_data); -100 in >> std::noskipws; -101 line result; -102 // add tokens (words or strings) one by one -103 while (has_data(in)) { -104 skip_whitespace(in); -105 if (!has_data(in)) break; -106 char c = in.get(); -107 if (c == '#') break; // comment; drop rest of line -108 if (c == ':') break; // line metadata; skip for now -109 if (c == '.') { -110 if (!has_data(in)) break; // comment token at end of line -111 if (isspace(in.peek())) -112 continue; // '.' followed by space is comment token; skip -113 } -114 ostringstream w; -115 w << c; -116 if (c == '"') { -117 // slurp until '"' -118 while (has_data(in)) { -119 in >> c; -120 w << c; -121 if (c == '"') break; -122 } -123 } -124 // slurp any remaining characters until whitespace -125 while (!isspace(in.peek()) && has_data(in)) { // peek can sometimes trigger eof(), so do it first -126 in >> c; -127 w << c; -128 } -129 result.words.push_back(word()); -130 parse_word(w.str(), result.words.back()); -131 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end(); -132 } -133 if (!result.words.empty()) -134 out.push_back(result); -135 } -136 -137 void skip_whitespace(istream& in) { -138 while (true) { -139 if (has_data(in) && isspace(in.peek())) in.get(); -140 else break; -141 } -142 } -143 -144 void skip_comment(istream& in) { -145 if (has_data(in) && in.peek() == '#') { -146 in.get(); -147 while (has_data(in) && in.peek() != '\n') in.get(); -148 } -149 } -150 -151 // helper for tests -152 void parse_instruction_character_by_character(const string& line_data) { -153 vector<line> out; -154 parse_instruction_character_by_character(line_data, out); -155 } -156 -157 :(scenario parse2_comment_token_in_middle) -158 a . z -159 +parse2: word: a -160 +parse2: word: z -161 -parse2: word: . -162 # no other words -163 $parse2: 2 -164 -165 :(scenario parse2_word_starting_with_dot) -166 a .b c -167 +parse2: word: a -168 +parse2: word: .b -169 +parse2: word: c -170 -171 :(scenario parse2_comment_token_at_start) -172 . a b -173 +parse2: word: a -174 +parse2: word: b -175 -parse2: word: . -176 -177 :(scenario parse2_comment_token_at_end) -178 a b . -179 +parse2: word: a -180 +parse2: word: b -181 -parse2: word: . -182 -183 :(scenario parse2_word_starting_with_dot_at_start) -184 .a b c -185 +parse2: word: .a -186 +parse2: word: b -187 +parse2: word: c -188 -189 :(scenario parse2_metadata) -190 .a b/c d -191 +parse2: word: .a -192 +parse2: word: b /c -193 +parse2: word: d -194 -195 :(scenario parse2_string_with_metadata) -196 a "bc def"/disp32 g -197 +parse2: word: a -198 +parse2: word: "bc def" /disp32 -199 +parse2: word: g -200 -201 :(scenario parse2_string_with_metadata_at_end) -202 a "bc def"/disp32 -203 +parse2: word: a -204 +parse2: word: "bc def" /disp32 -205 -206 :(code) -207 void test_parse2_string_with_metadata_at_end_of_line_without_newline() { -208 parse_instruction_character_by_character( -209 "68/push \"test\"/f" // no newline, which is how calls from parse() will look -210 ); -211 CHECK_TRACE_CONTENTS( -212 "parse2: word: 68 /push^D" -213 "parse2: word: \"test\" /f^D" -214 ); -215 } + 7 == code + 8 b8/copy "test"/imm32 # copy to EAX + 9 == data # need to manually create this for now + 10 +transform: -- move literal strings to data segment + 11 +transform: adding global variable '__subx_global_1' containing "test" + 12 +transform: instruction after transform: 'b8 __subx_global_1' + 13 + 14 //: We don't rely on any transforms running in previous layers, but this layer + 15 //: knows about labels and global variables and will emit them for previous + 16 //: layers to transform. + 17 :(after "Begin Transforms") + 18 // Begin Level-3 Transforms + 19 Transform.push_back(transform_literal_strings); + 20 // End Level-3 Transforms + 21 + 22 :(before "End Globals") + 23 int Next_auto_global = 1; + 24 :(code) + 25 void transform_literal_strings(program& p) { + 26 trace(99, "transform") << "-- move literal strings to data segment" << end(); + 27 if (p.segments.empty()) return; + 28 segment& code = p.segments.at(0); + 29 segment data; + 30 for (int i = 0; i < SIZE(code.lines); ++i) { + 31 line& inst = code.lines.at(i); + 32 for (int j = 0; j < SIZE(inst.words); ++j) { + 33 word& curr = inst.words.at(j); + 34 if (curr.data.at(0) != '"') continue; + 35 ostringstream global_name; + 36 global_name << "__subx_global_" << Next_auto_global; + 37 ++Next_auto_global; + 38 add_global_to_data_segment(global_name.str(), curr, data); + 39 curr.data = global_name.str(); + 40 } + 41 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); + 42 } + 43 if (data.lines.empty()) return; + 44 if (SIZE(p.segments) < 2) { + 45 p.segments.resize(2); + 46 p.segments.at(1).lines.swap(data.lines); + 47 } + 48 vector<line>& existing_data = p.segments.at(1).lines; + 49 existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end()); + 50 } + 51 + 52 void add_global_to_data_segment(const string& name, const word& value, segment& data) { + 53 trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end(); + 54 // emit label + 55 data.lines.push_back(label(name)); + 56 // emit size for size-prefixed array + 57 data.lines.push_back(line()); + 58 emit_hex_bytes(data.lines.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/); + 59 // emit data byte by byte + 60 data.lines.push_back(line()); + 61 line& curr = data.lines.back(); + 62 for (int i = /*skip start quote*/1; i < SIZE(value.data)-/*skip end quote*/1; ++i) { + 63 char c = value.data.at(i); + 64 curr.words.push_back(word()); + 65 curr.words.back().data = hex_byte_to_string(c); + 66 curr.words.back().metadata.push_back(string(1, c)); + 67 } + 68 } + 69 + 70 line label(string s) { + 71 line result; + 72 result.words.push_back(word()); + 73 result.words.back().data = (s+":"); + 74 return result; + 75 } + 76 + 77 //: Within strings, whitespace is significant. So we need to redo our instruction + 78 //: parsing. + 79 + 80 :(scenarios parse_instruction_character_by_character) + 81 :(scenario instruction_with_string_literal) + 82 a "abc def" z # two spaces inside string + 83 +parse2: word: a + 84 +parse2: word: "abc def" + 85 +parse2: word: z + 86 # no other words + 87 $parse2: 3 + 88 + 89 :(before "End Line Parsing Special-cases(line_data -> l)") + 90 if (line_data.find('"') != string::npos) { // can cause false-positives, but we can handle them + 91 parse_instruction_character_by_character(line_data, l); + 92 continue; + 93 } + 94 + 95 :(code) + 96 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) { + 97 // parse literals + 98 istringstream in(line_data); + 99 in >> std::noskipws; +100 line result; +101 // add tokens (words or strings) one by one +102 while (has_data(in)) { +103 skip_whitespace(in); +104 if (!has_data(in)) break; +105 char c = in.get(); +106 if (c == '#') break; // comment; drop rest of line +107 if (c == ':') break; // line metadata; skip for now +108 if (c == '.') { +109 if (!has_data(in)) break; // comment token at end of line +110 if (isspace(in.peek())) +111 continue; // '.' followed by space is comment token; skip +112 } +113 ostringstream w; +114 w << c; +115 if (c == '"') { +116 // slurp until '"' +117 while (has_data(in)) { +118 in >> c; +119 w << c; +120 if (c == '"') break; +121 } +122 } +123 // slurp any remaining characters until whitespace +124 while (!isspace(in.peek()) && has_data(in)) { // peek can sometimes trigger eof(), so do it first +125 in >> c; +126 w << c; +127 } +128 result.words.push_back(word()); +129 parse_word(w.str(), result.words.back()); +130 trace(99, "parse2") << "word: " << to_string(result.words.back()) << end(); +131 } +132 if (!result.words.empty()) +133 out.push_back(result); +134 } +135 +136 void skip_whitespace(istream& in) { +137 while (true) { +138 if (has_data(in) && isspace(in.peek())) in.get(); +139 else break; +140 } +141 } +142 +143 void skip_comment(istream& in) { +144 if (has_data(in) && in.peek() == '#') { +145 in.get(); +146 while (has_data(in) && in.peek() != '\n') in.get(); +147 } +148 } +149 +150 // helper for tests +151 void parse_instruction_character_by_character(const string& line_data) { +152 vector<line> out; +153 parse_instruction_character_by_character(line_data, out); +154 } +155 +156 :(scenario parse2_comment_token_in_middle) +157 a . z +158 +parse2: word: a +159 +parse2: word: z +160 -parse2: word: . +161 # no other words +162 $parse2: 2 +163 +164 :(scenario parse2_word_starting_with_dot) +165 a .b c +166 +parse2: word: a +167 +parse2: word: .b +168 +parse2: word: c +169 +170 :(scenario parse2_comment_token_at_start) +171 . a b +172 +parse2: word: a +173 +parse2: word: b +174 -parse2: word: . +175 +176 :(scenario parse2_comment_token_at_end) +177 a b . +178 +parse2: word: a +179 +parse2: word: b +180 -parse2: word: . +181 +182 :(scenario parse2_word_starting_with_dot_at_start) +183 .a b c +184 +parse2: word: .a +185 +parse2: word: b +186 +parse2: word: c +187 +188 :(scenario parse2_metadata) +189 .a b/c d +190 +parse2: word: .a +191 +parse2: word: b /c +192 +parse2: word: d +193 +194 :(scenario parse2_string_with_metadata) +195 a "bc def"/disp32 g +196 +parse2: word: a +197 +parse2: word: "bc def" /disp32 +198 +parse2: word: g +199 +200 :(scenario parse2_string_with_metadata_at_end) +201 a "bc def"/disp32 +202 +parse2: word: a +203 +parse2: word: "bc def" /disp32 +204 +205 :(code) +206 void test_parse2_string_with_metadata_at_end_of_line_without_newline() { +207 parse_instruction_character_by_character( +208 "68/push \"test\"/f" // no newline, which is how calls from parse() will look +209 ); +210 CHECK_TRACE_CONTENTS( +211 "parse2: word: 68 /push^D" +212 "parse2: word: \"test\" /f^D" +213 ); +214 } diff --git a/html/subx/040---tests.cc.html b/html/subx/040---tests.cc.html index 52be02b3..b51870bc 100644 --- a/html/subx/040---tests.cc.html +++ b/html/subx/040---tests.cc.html @@ -66,7 +66,7 @@ if ('onhashchange' in window) { 4 //: knows about labels and will emit labels for previous layers to transform. 5 :(after "Begin Transforms") 6 // Begin Level-4 Transforms - 7 Transform.push_back(create_test_function); + 7 Transform.push_back(create_test_function); 8 // End Level-4 Transforms 9 10 :(scenario run_test) @@ -87,21 +87,21 @@ if ('onhashchange' in window) { 25 void create_test_function(program& p) { 26 if (p.segments.empty()) return; 27 segment& code = p.segments.at(0); -28 trace(99, "transform") << "-- create 'run_tests'" << end(); +28 trace(99, "transform") << "-- create 'run_tests'" << end(); 29 vector<line> new_insts; 30 for (int i = 0; i < SIZE(code.lines); ++i) { 31 line& inst = code.lines.at(i); 32 for (int j = 0; j < SIZE(inst.words); ++j) { 33 const word& curr = inst.words.at(j); -34 if (*curr.data.rbegin() != ':') continue; // not a label -35 if (!starts_with(curr.data, "test_")) continue; -36 string fn = drop_last(curr.data); +34 if (*curr.data.rbegin() != ':') continue; // not a label +35 if (!starts_with(curr.data, "test_")) continue; +36 string fn = drop_last(curr.data); 37 new_insts.push_back(call(fn)); 38 } 39 } 40 if (new_insts.empty()) return; // no tests found 41 code.lines.push_back(label("run_tests")); -42 code.lines.insert(code.lines.end(), new_insts.begin(), new_insts.end()); +42 code.lines.insert(code.lines.end(), new_insts.begin(), new_insts.end()); 43 code.lines.push_back(ret()); 44 } 45 @@ -127,14 +127,14 @@ if ('onhashchange' in window) { 65 66 word call() { 67 word result; -68 result.data = "e8"; +68 result.data = "e8"; 69 result.metadata.push_back("call"); 70 return result; 71 } 72 73 word disp32(string s) { 74 word result; -75 result.data = s; +75 result.data = s; 76 result.metadata.push_back("disp32"); 77 return result; 78 } @@ -142,7 +142,7 @@ if ('onhashchange' in window) { 80 line ret() { 81 line result; 82 result.words.push_back(word()); -83 result.words.back().data = "c3"; +83 result.words.back().data = "c3"; 84 result.words.back().metadata.push_back("return"); 85 return result; 86 } diff --git a/html/subx/050write_stderr.subx.html b/html/subx/050write_stderr.subx.html new file mode 100644 index 00000000..63fb619c --- /dev/null +++ b/html/subx/050write_stderr.subx.html @@ -0,0 +1,97 @@ + + + + +Mu - subx/050write_stderr.subx + + + + + + + + + + +
+ 1 == code
+ 2 
+ 3 # instruction                     effective address                                                   operand     displacement    immediate
+ 4 # op          subop               mod             rm32          base        index         scale       r32
+ 5 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+ 6 
+ 7 write_stderr:  # s : (address array byte) -> <void>
+ 8   # prolog
+ 9   55/push-EBP
+10   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+11   # save registers
+12   50/push-EAX
+13   51/push-ECX
+14   52/push-EDX
+15   53/push-EBX
+16   # write(2/stderr, (data) s+4, (size) *s)
+17     # fd = 2 (stderr)
+18   bb/copy                         .               .             .           .             .           .           .               2/imm32           # copy to EBX
+19     # x = s+4
+20   8b/copy                         1/mod/*+disp8   4/rm32/SIB    5/base/EBP  4/index/none  .           1/r32/ECX   8/disp8         .                 # copy *(EBP+8) to ECX
+21   81          0/subop/add         3/mod/direct    1/rm32/ECX    .           .             .           .           .               4/imm32           # add to ECX
+22     # size = *s
+23   8b/copy                         1/mod/*+disp8   4/rm32/SIB    5/base/EBP  4/index/none  .           2/r32/EDX   8/disp8         .                 # copy *(EBP+8) to EDX
+24   8b/copy                         0/mod/indirect  2/rm32/EDX    .           .             .           2/r32/EDX   .               .                 # copy *EDX to EDX
+25     # call write()
+26   b8/copy                         .               .             .           .             .           .           .               4/imm32/write     # copy to EAX
+27   cd/syscall  0x80/imm8
+28   # restore registers
+29   5b/pop-to-EBX
+30   5a/pop-to-EDX
+31   59/pop-to-ECX
+32   58/pop-to-EAX
+33   # end
+34   89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
+35   5d/pop-to-EBP
+36   c3/return
+37 
+38 # vim:ft=subx:nowrap:tw&
+
+ + + diff --git a/html/subx/051test.subx.html b/html/subx/051test.subx.html new file mode 100644 index 00000000..225e011d --- /dev/null +++ b/html/subx/051test.subx.html @@ -0,0 +1,124 @@ + + + + +Mu - subx/051test.subx + + + + + + + + + + +
+ 1 == code
+ 2 
+ 3 # instruction                     effective address                                                   operand     displacement    immediate
+ 4 # op          subop               mod             rm32          base        index         scale       r32
+ 5 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+ 6 
+ 7 # print msg to stderr if a != b, otherwise print "."
+ 8 check_ints_equal:  # (a : int, b : int, msg : (address array byte)) -> boolean
+ 9   # prolog
+10   55/push-EBP
+11   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+12   # save registers
+13   51/push-ECX
+14   53/push-EBX
+15   # load args into EAX, EBX and ECX
+16   8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           0/r32/EAX   0x8/disp8       .                 # copy *(EBP+8) to EAX
+17   8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           3/r32/EBX   0xc/disp8       .                 # copy *(EBP+12) to EBX
+18   # if EAX == b/EBX
+19   39/compare                      3/mod/direct    0/rm32/EAX    .           .             .           3/r32/EBX   .               .                 # compare EAX and EBX
+20   75/jump-if-unequal  $check_ints_equal:else/disp8
+21     # print('.')
+22       # push args
+23   68/push  "."/imm32
+24       # call
+25   e8/call  write_stderr/disp32
+26       # discard arg
+27   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
+28     # return
+29   eb/jump  $check_ints_equal:end/disp8
+30   # else:
+31 $check_ints_equal:else:
+32   # copy msg into ECX
+33   8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           1/r32/ECX   0x10/disp8       .                # copy *(EBP+16) to ECX
+34     # print(ECX)
+35       # push args
+36   51/push-ECX
+37       # call
+38   e8/call  write_stderr/disp32
+39       # discard arg
+40   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
+41     # print newline
+42       # push args
+43   68/push  Newline/imm32
+44       # call
+45   e8/call  write_stderr/disp32
+46       # discard arg
+47   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
+48 $check_ints_equal:end:
+49   # restore registers
+50   5b/pop-to-EBX
+51   59/pop-to-ECX
+52   # end
+53   89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
+54   5d/pop-to-EBP
+55   c3/return
+56 
+57 == data
+58 
+59 Newline:
+60   # size
+61   01 00 00 00
+62   # data
+63   0a/newline
+64 
+65 # vim:ft=subx:nowrap:tw&
+
+ + + diff --git a/html/subx/052kernel_string_equal.subx.html b/html/subx/052kernel_string_equal.subx.html new file mode 100644 index 00000000..5bf5d1d5 --- /dev/null +++ b/html/subx/052kernel_string_equal.subx.html @@ -0,0 +1,320 @@ + + + + +Mu - subx/052kernel_string_equal.subx + + + + + + + + + + +
+  1 ## Checking null-terminated ascii strings.
+  2 #
+  3 # By default we create strings with a 4-byte length prefix rather than a null suffix.
+  4 # However we still need null-prefixed strings when interacting with the Linux
+  5 # kernel in a few places. This layer implements a function for comparing
+  6 # a null-terminated 'kernel string' with a length-prefixed 'SubX string'.
+  7 #
+  8 # To run (from the subx directory):
+  9 #   $ subx translate 05[0-2]*.subx -o /tmp/tmp52
+ 10 #   $ subx run /tmp/tmp52  # runs a series of tests
+ 11 #   ......  # all tests pass
+ 12 #
+ 13 # (We can't yet run the tests when given a "test" commandline argument,
+ 14 # because checking for it would require the function being tested! Breakage
+ 15 # would cause tests to not run, rather than to fail as we'd like.)
+ 16 
+ 17 == code
+ 18 
+ 19 # instruction                     effective address                                                   operand     displacement    immediate
+ 20 # op          subop               mod             rm32          base        index         scale       r32
+ 21 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+ 22 
+ 23 # main:  (if this is the last file loaded)
+ 24   e8/call  run_tests/disp32  # 'run_tests' is a function created automatically by SubX. It calls all functions that start with 'test_'.
+ 25   # exit(EAX)
+ 26   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
+ 27   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EAX
+ 28   cd/syscall  0x80/imm8
+ 29 
+ 30 # compare a null-terminated ascii string with a more idiomatic length-prefixed byte array
+ 31 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args
+ 32 kernel_string_equal:  # s : null-terminated ascii string, benchmark : length-prefixed ascii string -> EAX : boolean
+ 33   # prolog
+ 34   55/push-EBP
+ 35   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+ 36   # save registers
+ 37   51/push-ECX
+ 38   52/push-EDX
+ 39   53/push-EBX
+ 40   56/push-ESI
+ 41   57/push-EDI
+ 42 
+ 43   # pseudocode:
+ 44   #   initialize n = b.length
+ 45   #   initialize s1 = s
+ 46   #   initialize s2 = b.data
+ 47   #   i = 0
+ 48   #   for (i = 0; i < n; ++n)
+ 49   #     c1 = *s1
+ 50   #     c2 = *s2
+ 51   #     if c1 == 0
+ 52   #       return false
+ 53   #     if c1 != c2
+ 54   #       return false
+ 55   #   return *s1 == 0
+ 56   # initialize s into EDI
+ 57   8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           7/r32/EDI   8/disp8         .                 # copy *(EBP+8) to EDI
+ 58   # initialize benchmark length n into EDX
+ 59   8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           2/r32/EDX   0xc/disp8       .                 # copy *(EBP+12) to EDX
+ 60   8b/copy                         0/mod/indirect  2/rm32/EDX    .           .             .           2/r32/EDX   .               .                 # copy *EDX to EDX
+ 61   # initialize benchmark data into ESI
+ 62   8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           6/r32/ESI   0xc/disp8       .                 # copy *(EBP+12) to ESI
+ 63   81          0/subop/add         3/mod/direct    6/rm32/ESI    .           .             .           .           .               4/imm32           # add to ESI
+ 64   # initialize loop counter i into ECX
+ 65   b9/copy                         .               .             .           .             .           .           .               0/imm32/exit      # copy to ECX
+ 66   # while (i/ECX < n/EDX)
+ 67 $kernel_string_loop:
+ 68   39/compare                      3/mod/direct    1/rm32/ECX    .           .             .           2/r32/EDX   .               .                 # compare ECX with EDX
+ 69   74/jump-if-equal  $kernel_string_break/disp8
+ 70     # c1/EAX, c2/EBX = *s, *benchmark
+ 71   b8/copy  0/imm32  # clear EAX
+ 72   8a/copy                         0/mod/indirect  7/rm32/EDI    .           .             .           0/r32/EAX   .               .                 # copy byte at *EDI to lower byte of EAX
+ 73   bb/copy  0/imm32  # clear EBX
+ 74   8a/copy                         0/mod/indirect  6/rm32/ESI    .           .             .           3/r32/EBX   .               .                 # copy byte at *ESI to lower byte of EBX
+ 75     # if (c1 == 0) return false
+ 76   3d/compare-EAX  0/imm32
+ 77   74/jump-if-equal  $kernel_string_fail/disp8
+ 78     # if (c1 != c2) return false
+ 79   39/compare                      3/mod/direct    0/rm32/EAX    .           .             .           3/r32/EBX   .               .                 # compare EAX with EBX
+ 80   75/jump-if-not-equal  $kernel_string_fail/disp8
+ 81     # ++s1, ++s2, ++i
+ 82   41/inc-ECX
+ 83   46/inc-ESI
+ 84   47/inc-EDI
+ 85   # end while
+ 86   eb/jump  $kernel_string_loop/disp8
+ 87 $kernel_string_break:
+ 88   # if (*s/EDI == 0) return true
+ 89   b8/copy  0/imm32  # clear EAX
+ 90   8a/copy                         0/mod/indirect  7/rm32/EDI    .           .             .           0/r32/EAX   .               .                 # copy byte at *EDI to lower byte of EAX
+ 91   3d/compare-EAX  0/imm32
+ 92   75/jump-if-not-equal  $kernel_string_fail/disp8
+ 93   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EAX
+ 94   eb/jump  $kernel_string_end/disp8
+ 95   # return false
+ 96 $kernel_string_fail:
+ 97   b8/copy                         .               .             .           .             .           .           .               0/imm32           # copy to EAX
+ 98 
+ 99 $kernel_string_end:
+100   # restore registers
+101   5f/pop-to-EDI
+102   5e/pop-to-ESI
+103   5b/pop-to-EBX
+104   5a/pop-to-EDX
+105   59/pop-to-ECX
+106   # end
+107   89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
+108   5d/pop-to-EBP
+109   c3/return
+110 
+111 ## tests
+112 
+113 test_compare_null_kernel_string_with_empty_array:
+114   # EAX = kernel_string_equal(Null_kernel_string, "")
+115     # push args
+116   68/push  ""/imm32
+117   68/push  Null_kernel_string/imm32
+118     # call
+119   e8/call  kernel_string_equal/disp32
+120     # discard args
+121   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+122   # call check_ints_equal(EAX, 1, msg)
+123     # push args
+124   68/push  "F - test_compare_null_kernel_string_with_empty_array"/imm32
+125   68/push  1/imm32/true
+126   50/push-EAX
+127     # call
+128   e8/call  check_ints_equal/disp32
+129     # discard args
+130   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
+131   c3/return
+132 
+133 test_compare_null_kernel_string_with_non_empty_array:
+134   # EAX = kernel_string_equal(Null_kernel_string, "Abc")
+135     # push args
+136   68/push  "Abc"/imm32
+137   68/push  Null_kernel_string/imm32
+138     # call
+139   e8/call  kernel_string_equal/disp32
+140     # discard args
+141   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+142   # call check_ints_equal(EAX, 0, msg)
+143     # push args
+144   68/push  "F - test_compare_null_kernel_string_with_non_empty_array"/imm32
+145   68/push  0/imm32/false
+146   50/push-EAX
+147     # call
+148   e8/call  check_ints_equal/disp32
+149     # discard args
+150   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
+151   c3/return
+152 
+153 test_compare_kernel_string_with_equal_array:
+154   # EAX = kernel_string_equal(Abc_kernel_string, "Abc")
+155     # push args
+156   68/push  "Abc"/imm32
+157   68/push  Abc_kernel_string/imm32
+158     # call
+159   e8/call  kernel_string_equal/disp32
+160     # discard args
+161   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+162   # call check_ints_equal(EAX, 1, msg)
+163     # push args
+164   68/push  "F - test_compare_kernel_string_with_equal_array"/imm32
+165   68/push  1/imm32/true
+166   50/push-EAX
+167     # call
+168   e8/call  check_ints_equal/disp32
+169     # discard args
+170   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
+171   c3/return
+172 
+173 test_compare_kernel_string_with_inequal_array:
+174   # EAX = kernel_string_equal(Abc_kernel_string, "Adc")
+175     # push args
+176   68/push  "Adc"/imm32
+177   68/push  Abc_kernel_string/imm32
+178     # call
+179   e8/call  kernel_string_equal/disp32
+180     # discard args
+181   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+182   # call check_ints_equal(EAX, 0, msg)
+183     # push args
+184   68/push  "F - test_compare_kernel_string_with_equal_array"/imm32
+185   68/push  0/imm32/false
+186   50/push-EAX
+187     # call
+188   e8/call  check_ints_equal/disp32
+189     # discard args
+190   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
+191   c3/return
+192 
+193 test_compare_kernel_string_with_empty_array:
+194   # EAX = kernel_string_equal(Abc_kernel_string, "")
+195     # push args
+196   68/push  ""/imm32
+197   68/push  Abc_kernel_string/imm32
+198     # call
+199   e8/call  kernel_string_equal/disp32
+200     # discard args
+201   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+202   # call check_ints_equal(EAX, 0)
+203     # push args
+204   68/push  "F - test_compare_kernel_string_with_equal_array"/imm32
+205   68/push  0/imm32/false
+206   50/push-EAX
+207     # call
+208   e8/call  check_ints_equal/disp32
+209     # discard args
+210   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
+211   c3/return
+212 
+213 test_compare_kernel_string_with_shorter_array:
+214   # EAX = kernel_string_equal(Abc_kernel_string, "Ab")
+215     # push args
+216   68/push  "Ab"/imm32
+217   68/push  Abc_kernel_string/imm32
+218     # call
+219   e8/call  kernel_string_equal/disp32
+220     # discard args
+221   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+222   # call check_ints_equal(EAX, 0)
+223     # push args
+224   68/push  "F - test_compare_kernel_string_with_shorter_array"/imm32
+225   68/push  0/imm32/false
+226   50/push-EAX
+227     # call
+228   e8/call  check_ints_equal/disp32
+229     # discard args
+230   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
+231   c3/return
+232 
+233 test_compare_kernel_string_with_longer_array:
+234   # EAX = kernel_string_equal(Abc_kernel_string, "Abcd")
+235     # push args
+236   68/push  "Abcd"/imm32
+237   68/push  Abc_kernel_string/imm32
+238     # call
+239   e8/call  kernel_string_equal/disp32
+240     # discard args
+241   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+242   # call check_ints_equal(EAX, 0)
+243     # push args
+244   68/push  "F - test_compare_kernel_string_with_longer_array"/imm32
+245   68/push  0/imm32/false
+246   50/push-EAX
+247     # call
+248   e8/call  check_ints_equal/disp32
+249     # discard args
+250   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
+251   c3/return
+252 
+253 == data
+254 
+255 Null_kernel_string:
+256   00/null
+257 Abc_kernel_string:
+258   41/A 62/b 63/c 00/null
+259 
+260 # vim:ft=subx:nowrap:tw&
+
+ + + diff --git a/html/subx/apps/crenshaw2-1.subx.html b/html/subx/apps/crenshaw2-1.subx.html index c01985bb..8ac601ec 100644 --- a/html/subx/apps/crenshaw2-1.subx.html +++ b/html/subx/apps/crenshaw2-1.subx.html @@ -17,7 +17,6 @@ a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } .LineNr { color: #444444; } .Delimiter { color: #800080; } -.Folded { color: #a8a8a8; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } .SalientComment { color: #00ffff; } @@ -58,8 +57,8 @@ if ('onhashchange' in window) { 1 ## port of https://github.com/akkartik/crenshaw/blob/master/tutor2.1.pas 2 # corresponds to the section "single digits" in https://compilers.iecc.com/crenshaw/tutor2.txt 3 # - 4 # To run: - 5 # $ subx translate apps/crenshaw2.1.subx crenshaw 2.1 + 4 # To run (from the subx directory): + 5 # $ subx translate *.subx apps/crenshaw2.1.subx -o crenshaw 2.1 6 # $ echo '3' |subx run apps/crenshaw2.1 |xxd - 7 # Expected output: 8 # TODO @@ -82,13 +81,13 @@ if ('onhashchange' in window) { 25 # call 26 e8/call abort/disp32 27 # discard arg - 28 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP + 28 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP 29 # exit(0) - 30 bb/copy . . . . . . . 0/imm32 # copy 0 to EBX - 31 b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX + 30 bb/copy . . . . . . . 0/imm32 # copy to EBX + 31 b8/copy . . . . . . . 1/imm32/exit # copy to EAX 32 cd/syscall 0x80/imm8 33 - 34 ## compiler helpers + 34 ## helpers 35 36 # print error message and exit 37 # really maps to the 'Expected' function in Crenshaw @@ -99,10 +98,10 @@ if ('onhashchange' in window) { 42 # call 43 e8/call error/disp32 44 # discard arg - 45 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP + 45 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP 46 # exit(1) - 47 bb/copy . . . . . . . 1/imm32 # copy 1 to EBX - 48 b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX + 47 bb/copy . . . . . . . 1/imm32 # copy to EBX + 48 b8/copy . . . . . . . 1/imm32/exit # copy to EAX 49 cd/syscall 0x80/imm8 50 51 # print out "Error: #{s} expected\n" to stderr @@ -113,143 +112,63 @@ if ('onhashchange' in window) { 56 # call 57 e8/call write_stderr/disp32 58 # discard arg - 59 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP + 59 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP 60 # write_stderr(s) 61 # push args 62 ff 6/subop/push 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . . 4/disp8 . # push *(ESP+4) 63 # call 64 e8/call write_stderr/disp32 65 # discard arg - 66 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP + 66 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP 67 # write_stderr(" expected") 68 # push args 69 68/push " expected"/imm32 70 # call 71 e8/call write_stderr/disp32 72 # discard arg - 73 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP + 73 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP 74 # write_stderr("\n") 75 # push args 76 68/push Newline/imm32 77 # call 78 e8/call write_stderr/disp32 79 # discard arg - 80 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP + 80 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP 81 # end 82 c3/return 83 - 84 ## helpers - 85 - 86 # print msg to stderr if a != b, otherwise print "." - 87 check_ints_equal: # (a : int, b : int, msg : (address array byte)) -> boolean - 88 # load args into EAX, EBX and ECX - 89 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 0/r32/EAX 0xc/disp8 . # copy *(ESP+12) to EAX - 90 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 3/r32/EBX 0x8/disp8 . # copy *(ESP+8) to EBX - 91 # if EAX == b/EBX - 92 39/compare 3/mod/direct 0/rm32/EAX . . . 3/r32/EBX . . # compare EAX and EBX - 93 75/jump-if-unequal $check_ints_equal:else/disp8 - 94 # print('.') - 95 # push args - 96 68/push "."/imm32 - 97 # call - 98 e8/call write_stderr/disp32 - 99 # discard arg -100 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP -101 # return -102 c3/return -103 # else: -104 $check_ints_equal:else: -105 # copy msg into ECX -106 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 1/r32/ECX 4/disp8 . # copy *(ESP+4) to ECX -107 # print(ECX) -108 # push args -109 51/push-ECX -110 # call -111 e8/call write_stderr/disp32 -112 # discard arg -113 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP -114 # print newline -115 # push args -116 68/push Newline/imm32 -117 # call -118 e8/call write_stderr/disp32 -119 # discard arg -120 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP -121 # end -122 c3/return -123 -124 # compare a null-terminated ascii string with a more idiomatic length-prefixed byte array -125 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args -126 argv_equal: # s : null-terminated ascii string, benchmark : length-prefixed ascii string -> EAX : boolean -127 +-- 58 lines: # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -185 +--134 lines: # tests for argv_equal ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- -319 -320 write_stderr: # s : (address array byte) -> <void> -321 # save registers -322 50/push-EAX -323 51/push-ECX -324 52/push-EDX -325 53/push-EBX -326 # write(2/stderr, (data) s+4, (size) *s) -327 # fd = 2 (stderr) -328 bb/copy . . . . . . . 2/imm32 # copy 2 to EBX -329 # x = s+4 -330 8b/copy 1/mod/*+disp8 4/rm32/SIB 4/base/ESP 4/index/none . 1/r32/ECX 0x14/disp8 . # copy *(ESP+20) to ECX -331 81 0/subop/add 3/mod/direct 1/rm32/ECX . . . . . 4/imm32 # add 4 to ECX -332 # size = *s -333 8b/copy 1/mod/*+disp8 4/rm32/SIB 4/base/ESP 4/index/none . 2/r32/EDX 0x14/disp8 . # copy *(ESP+20) to EDX -334 8b/copy 0/mod/indirect 2/rm32/EDX . . . 2/r32/EDX . . # copy *EDX to EDX -335 # call write() -336 b8/copy . . . . . . . 4/imm32/write # copy 1 to EAX -337 cd/syscall 0x80/imm8 -338 # restore registers -339 5b/pop-EBX -340 5a/pop-EDX -341 59/pop-ECX -342 58/pop-EAX -343 # end -344 c3/return -345 -346 write_stdout: # s : (address array byte) -> <void> -347 # save registers -348 50/push-EAX -349 51/push-ECX -350 52/push-EDX -351 53/push-EBX -352 # write(1/stdout, (data) s+4, (size) *s) -353 # fd = 1 (stdout) -354 bb/copy . . . . . . . 1/imm32 # copy 1 to EBX -355 # x = s+4 -356 8b/copy 1/mod/*+disp8 4/rm32/SIB 4/base/ESP 4/index/none . 1/r32/ECX 0x14/disp8 . # copy *(ESP+20) to ECX -357 81 0/subop/add 3/mod/direct 1/rm32/ECX . . . . . 4/imm32 # add 4 to ECX -358 # size = *s -359 8b/copy 1/mod/*+disp8 4/rm32/SIB 4/base/ESP 4/index/none . 2/r32/EDX 0x14/disp8 . # copy *(ESP+20) to EDX -360 8b/copy 0/mod/indirect 2/rm32/EDX . . . 2/r32/EDX . . # copy *EDX to EDX -361 # call write() -362 b8/copy . . . . . . . 4/imm32/write # copy 1 to EAX -363 cd/syscall 0x80/imm8 -364 # restore registers -365 5b/pop-EBX -366 5a/pop-EDX -367 59/pop-ECX -368 58/pop-EAX -369 # end -370 c3/return -371 -372 == data -373 Newline: -374 # size -375 01 00 00 00 -376 # data -377 0a/newline -378 -379 # for argv_equal tests -380 Null_argv: -381 00/null -382 Abc_argv: -383 41/A 62/b 63/c 00/null -384 -385 # vim:ft=subx:nowrap:so=0 + 84 write_stdout: # s : (address array byte) -> <void> + 85 # prolog + 86 55/push-EBP + 87 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP + 88 # save registers + 89 50/push-EAX + 90 51/push-ECX + 91 52/push-EDX + 92 53/push-EBX + 93 # write(1/stdout, (data) s+4, (size) *s) + 94 # fd = 1 (stdout) + 95 bb/copy . . . . . . . 1/imm32 # copy to EBX + 96 # x = s+4 + 97 8b/copy 1/mod/*+disp8 4/rm32/SIB 5/base/EBP 4/index/none . 1/r32/ECX 8/disp8 . # copy *(EBP+8) to ECX + 98 81 0/subop/add 3/mod/direct 1/rm32/ECX . . . . . 4/imm32 # add to ECX + 99 # size = *s +100 8b/copy 1/mod/*+disp8 4/rm32/SIB 5/base/EBP 4/index/none . 2/r32/EDX 8/disp8 . # copy *(EBP+8) to EDX +101 8b/copy 0/mod/indirect 2/rm32/EDX . . . 2/r32/EDX . . # copy *EDX to EDX +102 # call write() +103 b8/copy . . . . . . . 4/imm32/write # copy to EAX +104 cd/syscall 0x80/imm8 +105 # restore registers +106 5b/pop-to-EBX +107 5a/pop-to-EDX +108 59/pop-to-ECX +109 58/pop-to-EAX +110 # end +111 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP +112 5d/pop-to-EBP +113 c3/return +114 +115 # vim:ft=subx:nowrap:so=0 diff --git a/html/subx/apps/factorial.subx.html b/html/subx/apps/factorial.subx.html index 4e11b12f..cbb66b9e 100644 --- a/html/subx/apps/factorial.subx.html +++ b/html/subx/apps/factorial.subx.html @@ -17,7 +17,6 @@ a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } .LineNr { color: #444444; } .Delimiter { color: #800080; } -.Folded { color: #a8a8a8; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } .SalientComment { color: #00ffff; } @@ -57,8 +56,8 @@ if ('onhashchange' in window) {
   1 ## compute the factorial of 5, and return the result in the exit code
   2 #
-  3 # To run:
-  4 #   $ subx translate apps/factorial.subx apps/factorial
+  3 # To run (from the subx directory):
+  4 #   $ subx translate apps/factorial.subx -o apps/factorial
   5 #   $ subx run apps/factorial
   6 # Expected result:
   7 #   $ echo $?
@@ -77,21 +76,21 @@ if ('onhashchange' in window) {
  20 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
  21 
  22 # main:
- 23   # if (argc > 1)
- 24   8b/copy                         0/mod/indirect  4/rm32/sib    4/base/ESP  4/index/none  .           0/r32/EAX   .               .                 # copy *ESP to EAX
- 25   3d/compare                      .               .             .           .             .           .           .               1/imm32           # compare EAX with 1
- 26   7e/jump-if-lesser-or-equal  $run_main/disp8
- 27   # and if (argv[1] == "test")
- 28   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           0/r32/EAX   8/disp8         .                 # copy *(ESP+8) to EAX
+ 23   # prolog
+ 24   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+ 25   # if (argc > 1)
+ 26   81          7/subop/compare     1/mod/*+disp8   4/rm32/SIB    5/base/EBP  4/index/none  .           .           0/disp8         1/imm32           # compare *EBP
+ 27   7e/jump-if-lesser-or-equal  $run_main/disp8
+ 28   # and if (argv[1] == "test")
  29     # push args
- 30   50/push-EAX
- 31   68/push  "test"/imm32
+ 30   68/push  "test"/imm32
+ 31   ff          6/subop/push        1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           .           0x8/disp8       .                 # push *(EBP+8)
  32     # call
- 33   e8/call  argv_equal/disp32
+ 33   e8/call  kernel_string_equal/disp32
  34     # discard args
- 35   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+ 35   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
  36     # check result
- 37   3d/compare                      .               .             .           .             .           .           .               1/imm32           # compare EAX with 1
+ 37   3d/compare-EAX  1/imm32
  38   75/jump-if-not-equal  $run_main/disp8
  39   # then
  40   e8/call  run_tests/disp32
@@ -103,40 +102,40 @@ if ('onhashchange' in window) {
  46     # EAX <- call
  47   e8/call  factorial/disp32
  48     # discard arg
- 49   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
+ 49   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
  50 $main_exit:
  51   # exit(EAX)
  52   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
- 53   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
+ 53   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EAX
  54   cd/syscall  0x80/imm8
  55 
  56 # factorial(n)
  57 factorial:
- 58   # initialize EAX to 1 (base case)
- 59   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
- 60   # if (n <= 1) jump exit
- 61   81          7/subop/compare     1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           .           4/disp8         1/imm32           # compare *(ESP+4) with 1
- 62   7e/jump-if-<=  $factorial:exit/disp8
- 63   # EBX: n-1
- 64   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              3/r32/EBX   4/disp8         .                 # copy *(ESP+4) to EBX
- 65   81          5/subop/subtract    3/mod/direct    3/rm32/EBX    .           .             .           .           .               1/imm32           # subtract 1 from EBX
- 66   # prepare call
- 67   55/push-EBP
- 68   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
- 69   # EAX: factorial(n-1)
- 70   53/push-EBX
- 71   e8/call                         .               .             .           .             .           .           factorial/disp32
- 72   # discard arg
- 73   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
- 74   # clean up after call
- 75   89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
- 76   5d/pop                          .               .             .           .             .           .           .               .                 # pop to EBP
- 77   # refresh n
- 78   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8         .                 # copy *(ESP+4) to EDX
- 79   # return n * factorial(n-1)
- 80   f7          4/subop/multiply    1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none                          4/disp8         .                 # multiply *(ESP+4) (n) into EAX (factorial(n-1))
- 81   # TODO: check for overflow
- 82 $factorial:exit:
+ 58   # prolog
+ 59   55/push-EBP
+ 60   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+ 61   53/push-EBX
+ 62   # initialize EAX to 1 (base case)
+ 63   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EAX
+ 64   # if (n <= 1) jump exit
+ 65   81          7/subop/compare     1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           .           8/disp8         1/imm32           # compare *(EBP+8)
+ 66   7e/jump-if-<=  $factorial:exit/disp8
+ 67   # EBX: n-1
+ 68   8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none              3/r32/EBX   8/disp8         .                 # copy *(EBP+8) to EBX
+ 69   81          5/subop/subtract    3/mod/direct    3/rm32/EBX    .           .             .           .           .               1/imm32           # subtract from EBX
+ 70   # EAX: factorial(n-1)
+ 71   53/push-EBX
+ 72   e8/call                         .               .             .           .             .           .           factorial/disp32
+ 73   # discard arg
+ 74   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
+ 75   # return n * factorial(n-1)
+ 76   f7          4/subop/multiply    1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none                          8/disp8         .                 # multiply *(EBP+8) into EAX
+ 77   # TODO: check for overflow
+ 78 $factorial:exit:
+ 79   # epilog
+ 80   5b/pop-to-EBX
+ 81   89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
+ 82   5d/pop-to-EBP
  83   c3/return
  84 
  85 test_factorial:
@@ -146,118 +145,20 @@ if ('onhashchange' in window) {
  89     # call
  90   e8/call  factorial/disp32
  91     # discard arg
- 92   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
+ 92   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
  93   # check_ints_equal(EAX, 120, failure message)
  94     # push args
- 95   50/push-EAX
+ 95   68/push  "F - test_factorial"/imm32
  96   68/push  0x78/imm32/expected-120
- 97   68/push  "F - test_factorial"/imm32
+ 97   50/push-EAX
  98     # call
  99   e8/call  check_ints_equal/disp32
 100     # discard args
-101   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+101   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
 102   # end
 103   c3/return
 104 
-105 ## helpers
-106 
-107 # print msg to stderr if a != b, otherwise print "."
-108 check_ints_equal:  # (a : int, b : int, msg : (address array byte)) -> boolean
-109   # load args into EAX, EBX and ECX
-110   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           0/r32/EAX   0xc/disp8       .                 # copy *(ESP+12) to EAX
-111   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           3/r32/EBX   0x8/disp8       .                 # copy *(ESP+8) to EBX
-112   # if EAX == b/EBX
-113   39/compare                      3/mod/direct    0/rm32/EAX    .           .             .           3/r32/EBX   .               .                 # compare EAX and EBX
-114   75/jump-if-unequal  $check_ints_equal:else/disp8
-115     # print('.')
-116       # push args
-117   68/push  "."/imm32
-118       # call
-119   e8/call  write_stderr/disp32
-120       # discard arg
-121   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
-122     # return
-123   c3/return
-124   # else:
-125 $check_ints_equal:else:
-126   # copy msg into ECX
-127   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           1/r32/ECX   4/disp8         .                 # copy *(ESP+4) to ECX
-128     # print(ECX)
-129       # push args
-130   51/push-ECX
-131       # call
-132   e8/call  write_stderr/disp32
-133       # discard arg
-134   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
-135     # print newline
-136       # push args
-137   68/push  Newline/imm32
-138       # call
-139   e8/call  write_stderr/disp32
-140       # discard arg
-141   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
-142   # end
-143   c3/return
-144 
-145 # compare a null-terminated ascii string with a more idiomatic length-prefixed byte array
-146 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args
-147 argv_equal:  # s : null-terminated ascii string, benchmark : length-prefixed ascii string -> EAX : boolean
-148   # pseudocode:
-149   #   initialize n = b.length
-150   #   initialize s1 = s
-151   #   initialize s2 = b.data
-152   #   i = 0
-153   #   for (i = 0; i < n; ++n)
-154   #     c1 = *s1
-155   #     c2 = *s2
-156   #     if c1 == 0
-157   #       return false
-158   #     if c1 != c2
-159   #       return false
-160   #   return *s1 == 0
-161 +-- 45 lines: # --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-206 +--134 lines: # tests for argv_equal -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-340 
-341 write_stderr:  # s : (address array byte) -> <void>
-342   # save registers
-343   50/push-EAX
-344   51/push-ECX
-345   52/push-EDX
-346   53/push-EBX
-347   # write(2/stderr, (data) s+4, (size) *s)
-348     # fd = 2 (stderr)
-349   bb/copy                         .               .             .           .             .           .           .               2/imm32           # copy 2 to EBX
-350     # x = s+4
-351   8b/copy                         1/mod/*+disp8   4/rm32/SIB    4/base/ESP  4/index/none  .           1/r32/ECX   0x14/disp8      .                 # copy *(ESP+20) to ECX
-352   81          0/subop/add         3/mod/direct    1/rm32/ECX    .           .             .           .           .               4/imm32           # add 4 to ECX
-353     # size = *s
-354   8b/copy                         1/mod/*+disp8   4/rm32/SIB    4/base/ESP  4/index/none  .           2/r32/EDX   0x14/disp8      .                 # copy *(ESP+20) to EDX
-355   8b/copy                         0/mod/indirect  2/rm32/EDX    .           .             .           2/r32/EDX   .               .                 # copy *EDX to EDX
-356     # call write()
-357   b8/copy                         .               .             .           .             .           .           .               4/imm32/write     # copy 1 to EAX
-358   cd/syscall  0x80/imm8
-359   # restore registers
-360   5b/pop-EBX
-361   5a/pop-EDX
-362   59/pop-ECX
-363   58/pop-EAX
-364   # end
-365   c3/return
-366 
-367 == data
-368 Newline:
-369   # size
-370   01 00 00 00
-371   # data
-372   0a/newline
-373 
-374 # for argv_equal tests
-375 Null_argv:
-376   00/null
-377 Abc_argv:
-378   41/A 62/b 63/c 00/null
-379 
-380 # vim:ft=subx:nowrap:so=0
+105 # vim:ft=subx:nowrap:so=0
 
diff --git a/html/subx/examples/ex1.1.subx.html b/html/subx/examples/ex1.1.subx.html deleted file mode 100644 index f706d6fd..00000000 --- a/html/subx/examples/ex1.1.subx.html +++ /dev/null @@ -1,79 +0,0 @@ - - - - -Mu - subx/examples/ex1.1.subx - - - - - - - - - - -
- 1 ## first program: same as https://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
- 2 # Just return 42.
- 3 #
- 4 # To run:
- 5 #   $ subx translate ex1.1.subx ex1
- 6 #   $ subx run ex1
- 7 # Expected result:
- 8 #   $ echo $?
- 9 #   42
-10 
-11 == code
-12 # opcode        ModR/M                    SIB                   displacement    immediate
-13 # instruction   mod, reg, Reg/Mem bits    scale, index, base
-14 # 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
-15   bb                                                                            2a 00 00 00       # copy 0x2a (42) to EBX
-16   # exit(EBX)
-17   b8                                                                            01 00 00 00       # copy 1 to EAX
-18   cd                                                                            80                # int 80h
-19 
-20 # vim:ft=subx
-
- - - diff --git a/html/subx/examples/ex1.2.subx.html b/html/subx/examples/ex1.2.subx.html deleted file mode 100644 index 0e7ec547..00000000 --- a/html/subx/examples/ex1.2.subx.html +++ /dev/null @@ -1,79 +0,0 @@ - - - - -Mu - subx/examples/ex1.2.subx - - - - - - - - - - -
- 1 ## first program: same as https://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
- 2 # Just return 42.
- 3 #
- 4 # To run:
- 5 #   $ subx translate ex1.2.subx ex1
- 6 #   $ subx run ex1
- 7 # Expected result:
- 8 #   $ echo $?
- 9 #   42
-10 
-11 == code
-12 # instruction                     effective address                                                   operand     displacement    immediate
-13 # op          subop               mod             rm32          base        index         scale       r32
-14 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-15   bb/copy                                                                                                                         2a/imm32          # copy 42 to EBX
-16   # exit(EBX)
-17   b8/copy                                                                                                                         1/imm32           # copy 1 to EAX
-18   cd/syscall  0x80/imm8
-19 
-20 # vim:ft=subx
-
- - - diff --git a/html/subx/examples/ex1.subx.html b/html/subx/examples/ex1.subx.html new file mode 100644 index 00000000..4ed9359e --- /dev/null +++ b/html/subx/examples/ex1.subx.html @@ -0,0 +1,79 @@ + + + + +Mu - subx/examples/ex1.subx + + + + + + + + + + +
+ 1 ## first program: same as https://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
+ 2 # Just return 42.
+ 3 #
+ 4 # To run (from the subx directory):
+ 5 #   $ subx translate examples/ex1.2.subx -o examples/ex1
+ 6 #   $ subx run examples/ex1
+ 7 # Expected result:
+ 8 #   $ echo $?
+ 9 #   42
+10 
+11 == code
+12 # instruction                     effective address                                                   operand     displacement    immediate
+13 # op          subop               mod             rm32          base        index         scale       r32
+14 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+15   bb/copy                                                                                                                         2a/imm32          # copy 42 to EBX
+16   # exit(EBX)
+17   b8/copy                                                                                                                         1/imm32           # copy to EAX
+18   cd/syscall  0x80/imm8
+19 
+20 # vim:ft=subx
+
+ + + diff --git a/html/subx/examples/ex10.subx.html b/html/subx/examples/ex10.subx.html index 8e7a698e..074cd0e8 100644 --- a/html/subx/examples/ex10.subx.html +++ b/html/subx/examples/ex10.subx.html @@ -56,9 +56,9 @@ if ('onhashchange' in window) {
  1 ## String comparison: return 1 iff the two args passed in at the commandline are equal.
  2 #
- 3 # To run:
- 4 #   $ subx translate ex10.subx ex10
- 5 #   $ subx run ex10 abc abd
+ 3 # To run (from the subx directory):
+ 4 #   $ subx translate examples/ex10.subx -o examples/ex10
+ 5 #   $ subx run examples/ex10 abc abd
  6 # Expected result:
  7 #   $ echo $?
  8 #   0  # false
@@ -74,56 +74,55 @@ if ('onhashchange' in window) {
 18 #         argv[0]: *(ESP+4)
 19 #         argv[1]: *(ESP+8)
 20 #         ...
-21   # s1 = argv[1] (EAX)
-22   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           0/r32/EAX   8/disp8         .                 # copy *(ESP+8) to EAX
-23   # s2 = argv[2] (EBX)
-24   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           3/r32/EBX   0xc/disp8       .                 # copy *(ESP+12) to EBX
-25   # call argv_equal(s1, s2)
-26     # push args
-27   50/push-EAX
-28   53/push-EBX
-29     # call
-30   e8/call argv_equal/disp32
-31   # exit(EAX)
-32 $exit:
-33   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
-34   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
-35   cd/syscall  0x80/imm8
-36 
-37 # compare two null-terminated ascii strings
-38 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args
-39 argv_equal:  # (s1, s2) : null-terminated ascii strings -> EAX : boolean
-40   # initialize s1 (ECX) and s2 (EDX)
-41   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           1/r32/ECX   8/disp8         .                 # copy *(ESP+8) to ECX
-42   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           2/r32/EDX   4/disp8         .                 # copy *(ESP+4) to EDX
-43   # while (true)
-44 $argv_loop:
-45     # c1/EAX, c2/EBX = *s1, *s2
-46   b8/copy  0/imm32  # clear EAX
-47   8a/copy                         0/mod/indirect  1/rm32/ECX    .           .             .           0/r32/EAX   .               .                 # copy byte at *ECX to lower byte of EAX
-48   bb/copy  0/imm32  # clear EBX
-49   8a/copy                         0/mod/indirect  2/rm32/EDX    .           .             .           3/r32/EBX   .               .                 # copy byte at *EDX to lower byte of EBX
-50     # if (c1 == 0) break
-51   3d/compare                      .               .             .           .             .           .           .               0/imm32           # compare EAX with 0
-52   74/jump-if-equal  $argv_break/disp8
-53     # if (c1 != c2) return false
-54   39/compare                      3/mod/direct    0/rm32/EAX    .           .             .           3/r32/EBX   .               .                 # compare EAX with EBX
-55   75/jump-if-not-equal  $argv_fail/disp8
-56     # ++s1, ++s2
-57   41/inc-ECX
-58   42/inc-EDX
-59   # end while
-60   eb/jump  $argv_loop/disp8
-61 $argv_break:
-62   # if (c2 == 0) return true
-63   81          7/subop/compare     3/mod/direct    3/rm32/EBX    .           .             .           .           .               0/imm32           # compare EBX with 0
-64   75/jump-if-not-equal  $argv_fail/disp8
-65   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
-66   c3/return
-67   # return false
-68 $argv_fail:
-69   b8/copy                         .               .             .           .             .           .           .               0/imm32           # copy 0 to EAX
-70   c3/return
+21   # prolog
+22   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+23   # call argv_equal(argv[1], argv[2])
+24     # push argv[2]
+25   ff          6/subop/push        1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           .           0xc/disp8       .                 # push *(EBP+12)
+26     # push argv[1]
+27   ff          6/subop/push        1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           .           0x8/disp8       .                 # push *(EBP+8)
+28     # call
+29   e8/call argv_equal/disp32
+30   # exit(EAX)
+31 $exit:
+32   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
+33   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EAX
+34   cd/syscall  0x80/imm8
+35 
+36 # compare two null-terminated ascii strings
+37 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args
+38 argv_equal:  # (s1, s2) : null-terminated ascii strings -> EAX : boolean
+39   # initialize s1 (ECX) and s2 (EDX)
+40   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           1/r32/ECX   4/disp8         .                 # copy *(ESP+4) to ECX
+41   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           2/r32/EDX   8/disp8         .                 # copy *(ESP+8) to EDX
+42   # while (true)
+43 $argv_loop:
+44     # c1/EAX, c2/EBX = *s1, *s2
+45   b8/copy  0/imm32  # clear EAX
+46   8a/copy                         0/mod/indirect  1/rm32/ECX    .           .             .           0/r32/EAX   .               .                 # copy byte at *ECX to lower byte of EAX
+47   bb/copy  0/imm32  # clear EBX
+48   8a/copy                         0/mod/indirect  2/rm32/EDX    .           .             .           3/r32/EBX   .               .                 # copy byte at *EDX to lower byte of EBX
+49     # if (c1 == 0) break
+50   3d/compare-EAX  0/imm32
+51   74/jump-if-equal  $argv_break/disp8
+52     # if (c1 != c2) return false
+53   39/compare                      3/mod/direct    0/rm32/EAX    .           .             .           3/r32/EBX   .               .                 # compare EAX with EBX
+54   75/jump-if-not-equal  $argv_fail/disp8
+55     # ++s1, ++s2
+56   41/inc-ECX
+57   42/inc-EDX
+58   # end while
+59   eb/jump  $argv_loop/disp8
+60 $argv_break:
+61   # if (c2 == 0) return true
+62   81          7/subop/compare     3/mod/direct    3/rm32/EBX    .           .             .           .           .               0/imm32           # compare EBX
+63   75/jump-if-not-equal  $argv_fail/disp8
+64   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EAX
+65   c3/return
+66   # return false
+67 $argv_fail:
+68   b8/copy                         .               .             .           .             .           .           .               0/imm32           # copy to EAX
+69   c3/return
 
diff --git a/html/subx/examples/ex11.subx.html b/html/subx/examples/ex11.subx.html index f0fd07c2..3273b21a 100644 --- a/html/subx/examples/ex11.subx.html +++ b/html/subx/examples/ex11.subx.html @@ -57,13 +57,13 @@ if ('onhashchange' in window) { 1 ## Null-terminated vs length-prefixed ascii strings. 2 # 3 # By default we create strings with a 4-byte length prefix rather than a null suffix. - 4 # However, commandline arguments come null-prefixed from the Linux kernel. - 5 # This example shows a helper that can compare a commandline argument with the - 6 # (length-prefixed) literal string "target". + 4 # However we still need null-prefixed strings when interacting with the Linux + 5 # kernel in a few places. This layer implements a function for comparing + 6 # a null-terminated 'kernel string' with a length-prefixed 'SubX string'. 7 # - 8 # To run: - 9 # $ subx translate ex11.subx ex11 - 10 # $ subx run ex11 # runs a series of tests + 8 # To run (from the subx directory): + 9 # $ subx translate examples/ex11.subx -o examples/ex11 + 10 # $ subx run examples/ex11 # runs a series of tests 11 # ...... # all tests pass 12 # 13 # (We can't yet run the tests when given a "test" commandline argument, @@ -71,293 +71,338 @@ if ('onhashchange' in window) { 15 # would cause tests to not run, rather than to fail as we'd like.) 16 17 == code - 18 # instruction effective address operand displacement immediate - 19 # op subop mod rm32 base index scale r32 - 20 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes - 21 - 22 # main: - 23 e8/call run_tests/disp32 # 'run_tests' is a function created automatically by SubX. It calls all functions that start with 'test_'. - 24 # exit(EAX) - 25 89/copy 3/mod/direct 3/rm32/EBX . . . 0/r32/EAX . . # copy EAX to EBX - 26 b8/copy . . . . . . . 1/imm32 # copy 1 to EAX - 27 cd/syscall 0x80/imm8 - 28 - 29 # compare a null-terminated ascii string with a more idiomatic length-prefixed byte array - 30 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args - 31 argv_equal: # s : null-terminated ascii string, benchmark : length-prefixed ascii string -> EAX : boolean - 32 # pseudocode: - 33 # initialize n = b.length - 34 # initialize s1 = s - 35 # initialize s2 = b.data - 36 # i = 0 - 37 # for (i = 0; i < n; ++n) - 38 # c1 = *s1 - 39 # c2 = *s2 - 40 # if c1 == 0 - 41 # return false - 42 # if c1 != c2 - 43 # return false - 44 # return *s1 == 0 - 45 - 46 # initialize s into EDI - 47 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 7/r32/EDI 8/disp8 . # copy *(ESP+8) to EDI - 48 # initialize benchmark length n into EDX - 49 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 2/r32/EDX 4/disp8 . # copy *(ESP+4) to EDX - 50 8b/copy 0/mod/indirect 2/rm32/EDX . . . 2/r32/EDX . . # copy *EDX to EDX - 51 # initialize benchmark data into ESI - 52 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 6/r32/ESI 4/disp8 . # copy *(ESP+4) to ESI - 53 81 0/subop/add 3/mod/direct 6/rm32/ESI . . . . . 4/imm32 # add 4 to ESI - 54 # initialize loop counter i into ECX - 55 b9/copy . . . . . . . 0/imm32/exit # copy 1 to ECX - 56 # while (i/ECX < n/EDX) - 57 $argv_loop: - 58 39/compare 3/mod/direct 1/rm32/ECX . . . 2/r32/EDX . . # compare ECX with EDX - 59 74/jump-if-equal $argv_break/disp8 - 60 # c1/EAX, c2/EBX = *s, *benchmark - 61 b8/copy 0/imm32 # clear EAX - 62 8a/copy 0/mod/indirect 7/rm32/EDI . . . 0/r32/EAX . . # copy byte at *EDI to lower byte of EAX - 63 bb/copy 0/imm32 # clear EBX - 64 8a/copy 0/mod/indirect 6/rm32/ESI . . . 3/r32/EBX . . # copy byte at *ESI to lower byte of EBX - 65 # if (c1 == 0) return false - 66 3d/compare . . . . . . . 0/imm32 # compare EAX with 0 - 67 74/jump-if-equal $argv_fail/disp8 - 68 # if (c1 != c2) return false - 69 39/compare 3/mod/direct 0/rm32/EAX . . . 3/r32/EBX . . # compare EAX with EBX - 70 75/jump-if-not-equal $argv_fail/disp8 - 71 # ++s1, ++s2, ++i - 72 41/inc-ECX - 73 46/inc-ESI - 74 47/inc-EDI - 75 # end while - 76 eb/jump $argv_loop/disp8 - 77 $argv_break: - 78 # if (*s/EDI == 0) return true - 79 b8/copy 0/imm32 # clear EAX - 80 8a/copy 0/mod/indirect 7/rm32/EDI . . . 0/r32/EAX . . # copy byte at *EDI to lower byte of EAX - 81 81 7/subop/compare 3/mod/direct 0/rm32/EAX . . . . . 0/imm32 # compare EAX with 0 - 82 75/jump-if-not-equal $argv_fail/disp8 - 83 b8/copy . . . . . . . 1/imm32 # copy 1 to EAX - 84 c3/return - 85 # return false - 86 $argv_fail: - 87 b8/copy . . . . . . . 0/imm32 # copy 0 to EAX - 88 c3/return - 89 - 90 ## tests - 91 - 92 test_compare_null_argv_with_empty_array: - 93 # EAX = argv_equal(Null_argv, "") - 94 # push args - 95 68/push Null_argv/imm32 - 96 68/push ""/imm32 - 97 # call - 98 e8/call argv_equal/disp32 - 99 # discard args -100 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add 8 to ESP -101 # call check_ints_equal(EAX, 1) -102 50/push-EAX -103 68/push 1/imm32/true -104 68/push "F - test_compare_null_argv_with_empty_array"/imm32 -105 # call -106 e8/call check_ints_equal/disp32 -107 # discard args -108 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add 12 to ESP + 18 + 19 # instruction effective address operand displacement immediate + 20 # op subop mod rm32 base index scale r32 + 21 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes + 22 + 23 # main: + 24 e8/call run_tests/disp32 # 'run_tests' is a function created automatically by SubX. It calls all functions that start with 'test_'. + 25 # exit(EAX) + 26 89/copy 3/mod/direct 3/rm32/EBX . . . 0/r32/EAX . . # copy EAX to EBX + 27 b8/copy . . . . . . . 1/imm32 # copy to EAX + 28 cd/syscall 0x80/imm8 + 29 + 30 # compare a null-terminated ascii string with a more idiomatic length-prefixed byte array + 31 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args + 32 kernel_string_equal: # s : null-terminated ascii string, benchmark : length-prefixed ascii string -> EAX : boolean + 33 # prolog + 34 55/push-EBP + 35 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP + 36 # save registers + 37 51/push-ECX + 38 52/push-EDX + 39 53/push-EBX + 40 56/push-ESI + 41 57/push-EDI + 42 + 43 # pseudocode: + 44 # initialize n = b.length + 45 # initialize s1 = s + 46 # initialize s2 = b.data + 47 # i = 0 + 48 # for (i = 0; i < n; ++n) + 49 # c1 = *s1 + 50 # c2 = *s2 + 51 # if c1 == 0 + 52 # return false + 53 # if c1 != c2 + 54 # return false + 55 # return *s1 == 0 + 56 # initialize s into EDI + 57 8b/copy 1/mod/*+disp8 4/rm32/sib 5/base/EBP 4/index/none . 7/r32/EDI 8/disp8 . # copy *(EBP+8) to EDI + 58 # initialize benchmark length n into EDX + 59 8b/copy 1/mod/*+disp8 4/rm32/sib 5/base/EBP 4/index/none . 2/r32/EDX 0xc/disp8 . # copy *(EBP+12) to EDX + 60 8b/copy 0/mod/indirect 2/rm32/EDX . . . 2/r32/EDX . . # copy *EDX to EDX + 61 # initialize benchmark data into ESI + 62 8b/copy 1/mod/*+disp8 4/rm32/sib 5/base/EBP 4/index/none . 6/r32/ESI 0xc/disp8 . # copy *(EBP+12) to ESI + 63 81 0/subop/add 3/mod/direct 6/rm32/ESI . . . . . 4/imm32 # add to ESI + 64 # initialize loop counter i into ECX + 65 b9/copy . . . . . . . 0/imm32/exit # copy to ECX + 66 # while (i/ECX < n/EDX) + 67 $kernel_string_loop: + 68 39/compare 3/mod/direct 1/rm32/ECX . . . 2/r32/EDX . . # compare ECX with EDX + 69 74/jump-if-equal $kernel_string_break/disp8 + 70 # c1/EAX, c2/EBX = *s, *benchmark + 71 b8/copy 0/imm32 # clear EAX + 72 8a/copy 0/mod/indirect 7/rm32/EDI . . . 0/r32/EAX . . # copy byte at *EDI to lower byte of EAX + 73 bb/copy 0/imm32 # clear EBX + 74 8a/copy 0/mod/indirect 6/rm32/ESI . . . 3/r32/EBX . . # copy byte at *ESI to lower byte of EBX + 75 # if (c1 == 0) return false + 76 3d/compare-EAX 0/imm32 + 77 74/jump-if-equal $kernel_string_fail/disp8 + 78 # if (c1 != c2) return false + 79 39/compare 3/mod/direct 0/rm32/EAX . . . 3/r32/EBX . . # compare EAX with EBX + 80 75/jump-if-not-equal $kernel_string_fail/disp8 + 81 # ++s1, ++s2, ++i + 82 41/inc-ECX + 83 46/inc-ESI + 84 47/inc-EDI + 85 # end while + 86 eb/jump $kernel_string_loop/disp8 + 87 $kernel_string_break: + 88 # if (*s/EDI == 0) return true + 89 b8/copy 0/imm32 # clear EAX + 90 8a/copy 0/mod/indirect 7/rm32/EDI . . . 0/r32/EAX . . # copy byte at *EDI to lower byte of EAX + 91 81 7/subop/compare 3/mod/direct 0/rm32/EAX . . . . . 0/imm32 # compare EAX + 92 75/jump-if-not-equal $kernel_string_fail/disp8 + 93 b8/copy . . . . . . . 1/imm32 # copy to EAX + 94 eb/jump $kernel_string_end/disp8 + 95 # return false + 96 $kernel_string_fail: + 97 b8/copy . . . . . . . 0/imm32 # copy to EAX + 98 + 99 $kernel_string_end: +100 # restore registers +101 5f/pop-to-EDI +102 5e/pop-to-ESI +103 5b/pop-to-EBX +104 5a/pop-to-EDX +105 59/pop-to-ECX +106 # end +107 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP +108 5d/pop-to-EBP 109 c3/return 110 -111 test_compare_null_argv_with_non_empty_array: -112 # EAX = argv_equal(Null_argv, "Abc") -113 # push args -114 68/push Null_argv/imm32 -115 68/push "Abc"/imm32 -116 # call -117 e8/call argv_equal/disp32 -118 # discard args -119 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add 8 to ESP -120 # call check_ints_equal(EAX, 0) -121 50/push-EAX -122 68/push 0/imm32/false -123 68/push "F - test_compare_null_argv_with_non_empty_array"/imm32 -124 # call -125 e8/call check_ints_equal/disp32 -126 # discard args -127 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add 12 to ESP -128 c3/return -129 -130 test_compare_argv_with_equal_array: -131 # EAX = argv_equal(Abc_argv, "Abc") -132 # push args -133 68/push Abc_argv/imm32 -134 68/push "Abc"/imm32 -135 # call -136 e8/call argv_equal/disp32 -137 # discard args -138 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add 8 to ESP -139 # call check_ints_equal(EAX, 1) -140 50/push-EAX -141 68/push 1/imm32/true -142 68/push "F - test_compare_argv_with_equal_array"/imm32 -143 # call -144 e8/call check_ints_equal/disp32 -145 # discard args -146 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add 12 to ESP -147 c3/return -148 -149 test_compare_argv_with_inequal_array: -150 # EAX = argv_equal(Abc_argv, "Adc") -151 # push args -152 68/push Abc_argv/imm32 -153 68/push "Adc"/imm32 -154 # call -155 e8/call argv_equal/disp32 -156 # discard args -157 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add 8 to ESP -158 # call check_ints_equal(EAX, 0) -159 50/push-EAX -160 68/push 0/imm32/false -161 68/push "F - test_compare_argv_with_equal_array"/imm32 -162 # call -163 e8/call check_ints_equal/disp32 -164 # discard args -165 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add 12 to ESP -166 c3/return -167 -168 test_compare_argv_with_empty_array: -169 # EAX = argv_equal(Abc_argv, "") -170 # push args -171 68/push Abc_argv/imm32 -172 68/push ""/imm32 -173 # call -174 e8/call argv_equal/disp32 -175 # discard args -176 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add 8 to ESP -177 # call check_ints_equal(EAX, 0) -178 50/push-EAX -179 68/push 0/imm32/false -180 68/push "F - test_compare_argv_with_equal_array"/imm32 -181 # call -182 e8/call check_ints_equal/disp32 -183 # discard args -184 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add 12 to ESP -185 c3/return -186 -187 test_compare_argv_with_shorter_array: -188 # EAX = argv_equal(Abc_argv, "Ab") -189 # push args -190 68/push Abc_argv/imm32 -191 68/push "Ab"/imm32 -192 # call -193 e8/call argv_equal/disp32 -194 # discard args -195 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add 8 to ESP -196 # call check_ints_equal(EAX, 0) -197 50/push-EAX -198 68/push 0/imm32/false -199 68/push "F - test_compare_argv_with_shorter_array"/imm32 -200 # call -201 e8/call check_ints_equal/disp32 -202 # discard args -203 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add 12 to ESP -204 c3/return -205 -206 test_compare_argv_with_longer_array: -207 # EAX = argv_equal(Abc_argv, "Abcd") -208 # push args -209 68/push Abc_argv/imm32 -210 68/push "Abcd"/imm32 -211 # call -212 e8/call argv_equal/disp32 -213 # discard args -214 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add 8 to ESP -215 # call check_ints_equal(EAX, 0) -216 50/push-EAX -217 68/push 0/imm32/false -218 68/push "F - test_compare_argv_with_longer_array"/imm32 -219 # call -220 e8/call check_ints_equal/disp32 -221 # discard args -222 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add 12 to ESP -223 c3/return -224 -225 ## helpers -226 -227 # print msg to stderr if a != b, otherwise print "." -228 check_ints_equal: # (a : int, b : int, msg : (address array byte)) -> boolean -229 # load args into EAX, EBX and ECX -230 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 0/r32/EAX 0xc/disp8 . # copy *(ESP+12) to EAX -231 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 3/r32/EBX 0x8/disp8 . # copy *(ESP+8) to EBX -232 # if EAX == b/EBX -233 39/compare 3/mod/direct 0/rm32/EAX . . . 3/r32/EBX . . # compare EAX and EBX -234 75/jump-if-unequal $check_ints_equal:else/disp8 -235 # print('.') -236 # push args -237 68/push "."/imm32 -238 # call -239 e8/call write_stderr/disp32 -240 # discard arg -241 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP -242 # return -243 c3/return -244 # else: -245 $check_ints_equal:else: -246 # copy msg into ECX -247 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . 1/r32/ECX 4/disp8 . # copy *(ESP+4) to ECX -248 # print(ECX) -249 # push args -250 51/push-ECX -251 # call -252 e8/call write_stderr/disp32 -253 # discard arg -254 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP -255 # print newline -256 # push args -257 68/push Newline/imm32 -258 # call -259 e8/call write_stderr/disp32 -260 # discard arg -261 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP -262 # end -263 c3/return -264 -265 write_stderr: # s : (address array byte) -> <void> -266 # save registers -267 50/push-EAX -268 51/push-ECX -269 52/push-EDX -270 53/push-EBX -271 # write(2/stderr, (data) s+4, (size) *s) -272 # fd = 2 (stderr) -273 bb/copy . . . . . . . 2/imm32 # copy 2 to EBX -274 # x = s+4 -275 8b/copy 1/mod/*+disp8 4/rm32/SIB 4/base/ESP 4/index/none . 1/r32/ECX 0x14/disp8 . # copy *(ESP+20) to ECX -276 81 0/subop/add 3/mod/direct 1/rm32/ECX . . . . . 4/imm32 # add 4 to ECX -277 # size = *s -278 8b/copy 1/mod/*+disp8 4/rm32/SIB 4/base/ESP 4/index/none . 2/r32/EDX 0x14/disp8 . # copy *(ESP+20) to EDX -279 8b/copy 0/mod/indirect 2/rm32/EDX . . . 2/r32/EDX . . # copy *EDX to EDX -280 # call write() -281 b8/copy . . . . . . . 4/imm32/write # copy 1 to EAX -282 cd/syscall 0x80/imm8 -283 # restore registers -284 5b/pop-EBX -285 5a/pop-EDX -286 59/pop-ECX -287 58/pop-EAX -288 # end -289 c3/return -290 -291 == data -292 Newline: -293 # size -294 01 00 00 00 -295 # data -296 0a/newline -297 -298 # for argv_equal tests -299 Null_argv: -300 00/null -301 Abc_argv: -302 41/A 62/b 63/c 00/null -303 -304 # vim:ft=subx:nowrap:so=0 +111 ## tests +112 +113 test_compare_null_kernel_string_with_empty_array: +114 # EAX = kernel_string_equal(Null_kernel_string, "") +115 # push args +116 68/push ""/imm32 +117 68/push Null_kernel_string/imm32 +118 # call +119 e8/call kernel_string_equal/disp32 +120 # discard args +121 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP +122 # call check_ints_equal(EAX, 1, msg) +123 # push args +124 68/push "F - test_compare_null_kernel_string_with_empty_array"/imm32 +125 68/push 1/imm32/true +126 50/push-EAX +127 # call +128 e8/call check_ints_equal/disp32 +129 # discard args +130 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP +131 c3/return +132 +133 test_compare_null_kernel_string_with_non_empty_array: +134 # EAX = kernel_string_equal(Null_kernel_string, "Abc") +135 # push args +136 68/push "Abc"/imm32 +137 68/push Null_kernel_string/imm32 +138 # call +139 e8/call kernel_string_equal/disp32 +140 # discard args +141 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP +142 # call check_ints_equal(EAX, 0, msg) +143 # push args +144 68/push "F - test_compare_null_kernel_string_with_non_empty_array"/imm32 +145 68/push 0/imm32/false +146 50/push-EAX +147 # call +148 e8/call check_ints_equal/disp32 +149 # discard args +150 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP +151 c3/return +152 +153 test_compare_kernel_string_with_equal_array: +154 # EAX = kernel_string_equal(Abc_kernel_string, "Abc") +155 # push args +156 68/push "Abc"/imm32 +157 68/push Abc_kernel_string/imm32 +158 # call +159 e8/call kernel_string_equal/disp32 +160 # discard args +161 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP +162 # call check_ints_equal(EAX, 1, msg) +163 # push args +164 68/push "F - test_compare_kernel_string_with_equal_array"/imm32 +165 68/push 1/imm32/true +166 50/push-EAX +167 # call +168 e8/call check_ints_equal/disp32 +169 # discard args +170 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP +171 c3/return +172 +173 test_compare_kernel_string_with_inequal_array: +174 # EAX = kernel_string_equal(Abc_kernel_string, "Adc") +175 # push args +176 68/push "Adc"/imm32 +177 68/push Abc_kernel_string/imm32 +178 # call +179 e8/call kernel_string_equal/disp32 +180 # discard args +181 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP +182 # call check_ints_equal(EAX, 0, msg) +183 # push args +184 68/push "F - test_compare_kernel_string_with_equal_array"/imm32 +185 68/push 0/imm32/false +186 50/push-EAX +187 # call +188 e8/call check_ints_equal/disp32 +189 # discard args +190 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP +191 c3/return +192 +193 test_compare_kernel_string_with_empty_array: +194 # EAX = kernel_string_equal(Abc_kernel_string, "") +195 # push args +196 68/push ""/imm32 +197 68/push Abc_kernel_string/imm32 +198 # call +199 e8/call kernel_string_equal/disp32 +200 # discard args +201 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP +202 # call check_ints_equal(EAX, 0) +203 # push args +204 68/push "F - test_compare_kernel_string_with_equal_array"/imm32 +205 68/push 0/imm32/false +206 50/push-EAX +207 # call +208 e8/call check_ints_equal/disp32 +209 # discard args +210 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP +211 c3/return +212 +213 test_compare_kernel_string_with_shorter_array: +214 # EAX = kernel_string_equal(Abc_kernel_string, "Ab") +215 # push args +216 68/push "Ab"/imm32 +217 68/push Abc_kernel_string/imm32 +218 # call +219 e8/call kernel_string_equal/disp32 +220 # discard args +221 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP +222 # call check_ints_equal(EAX, 0) +223 # push args +224 68/push "F - test_compare_kernel_string_with_shorter_array"/imm32 +225 68/push 0/imm32/false +226 50/push-EAX +227 # call +228 e8/call check_ints_equal/disp32 +229 # discard args +230 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP +231 c3/return +232 +233 test_compare_kernel_string_with_longer_array: +234 # EAX = kernel_string_equal(Abc_kernel_string, "Abcd") +235 # push args +236 68/push "Abcd"/imm32 +237 68/push Abc_kernel_string/imm32 +238 # call +239 e8/call kernel_string_equal/disp32 +240 # discard args +241 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP +242 # call check_ints_equal(EAX, 0) +243 # push args +244 68/push "F - test_compare_kernel_string_with_longer_array"/imm32 +245 68/push 0/imm32/false +246 50/push-EAX +247 # call +248 e8/call check_ints_equal/disp32 +249 # discard args +250 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP +251 c3/return +252 +253 ## helpers +254 +255 # print msg to stderr if a != b, otherwise print "." +256 check_ints_equal: # (a : int, b : int, msg : (address array byte)) -> boolean +257 # prolog +258 55/push-EBP +259 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP +260 # save registers +261 51/push-ECX +262 53/push-EBX +263 # load args into EAX, EBX and ECX +264 8b/copy 1/mod/*+disp8 4/rm32/sib 5/base/EBP 4/index/none . 0/r32/EAX 0x8/disp8 . # copy *(EBP+8) to EAX +265 8b/copy 1/mod/*+disp8 4/rm32/sib 5/base/EBP 4/index/none . 3/r32/EBX 0xc/disp8 . # copy *(EBP+12) to EBX +266 # if EAX == b/EBX +267 39/compare 3/mod/direct 0/rm32/EAX . . . 3/r32/EBX . . # compare EAX and EBX +268 75/jump-if-unequal $check_ints_equal:else/disp8 +269 # print('.') +270 # push args +271 68/push "."/imm32 +272 # call +273 e8/call write_stderr/disp32 +274 # discard arg +275 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP +276 # return +277 eb/jump $check_ints_equal:end/disp8 +278 # else: +279 $check_ints_equal:else: +280 # copy msg into ECX +281 8b/copy 1/mod/*+disp8 4/rm32/sib 5/base/EBP 4/index/none . 1/r32/ECX 0x10/disp8 . # copy *(EBP+16) to ECX +282 # print(ECX) +283 # push args +284 51/push-ECX +285 # call +286 e8/call write_stderr/disp32 +287 # discard arg +288 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP +289 # print newline +290 # push args +291 68/push Newline/imm32 +292 # call +293 e8/call write_stderr/disp32 +294 # discard arg +295 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP +296 $check_ints_equal:end: +297 # restore registers +298 5b/pop-to-EBX +299 59/pop-to-ECX +300 # end +301 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP +302 5d/pop-to-EBP +303 c3/return +304 +305 write_stderr: # s : (address array byte) -> <void> +306 # prolog +307 55/push-EBP +308 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP +309 # save registers +310 50/push-EAX +311 51/push-ECX +312 52/push-EDX +313 53/push-EBX +314 # write(2/stderr, (data) s+4, (size) *s) +315 # fd = 2 (stderr) +316 bb/copy . . . . . . . 2/imm32 # copy to EBX +317 # x = s+4 +318 8b/copy 1/mod/*+disp8 4/rm32/SIB 5/base/EBP 4/index/none . 1/r32/ECX 8/disp8 . # copy *(EBP+8) to ECX +319 81 0/subop/add 3/mod/direct 1/rm32/ECX . . . . . 4/imm32 # add to ECX +320 # size = *s +321 8b/copy 1/mod/*+disp8 4/rm32/SIB 5/base/EBP 4/index/none . 2/r32/EDX 8/disp8 . # copy *(EBP+8) to EDX +322 8b/copy 0/mod/indirect 2/rm32/EDX . . . 2/r32/EDX . . # copy *EDX to EDX +323 # call write() +324 b8/copy . . . . . . . 4/imm32/write # copy to EAX +325 cd/syscall 0x80/imm8 +326 # restore registers +327 5b/pop-to-EBX +328 5a/pop-to-EDX +329 59/pop-to-ECX +330 58/pop-to-EAX +331 # end +332 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP +333 5d/pop-to-EBP +334 c3/return +335 +336 == data +337 Newline: +338 # size +339 01 00 00 00 +340 # data +341 0a/newline +342 +343 # for kernel_string_equal tests +344 Null_kernel_string: +345 00/null +346 Abc_kernel_string: +347 41/A 62/b 63/c 00/null +348 +349 # vim:ft=subx:nowrap:so=0 diff --git a/html/subx/examples/ex12.subx.html b/html/subx/examples/ex12.subx.html new file mode 100644 index 00000000..93feff8c --- /dev/null +++ b/html/subx/examples/ex12.subx.html @@ -0,0 +1,103 @@ + + + + +Mu - subx/examples/ex12.subx + + + + + + + + + + +
+ 1 ## example showing mmap syscall
+ 2 # Create a new segment using mmap, save the address, write to it.
+ 3 #
+ 4 # To run (from the subx directory):
+ 5 #   $ subx translate examples/ex12.subx -o examples/ex12
+ 6 #   $ subx run examples/ex12
+ 7 # You shouldn't get a segmentation fault.
+ 8 
+ 9 == code
+10 # instruction                     effective address                                                   operand     displacement    immediate
+11 # op          subop               mod             rm32          base        index         scale       r32
+12 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+13 
+14   # mmap(0x1000)
+15   bb/copy                         .               .             .           .             .           .           .               mmap_new_segment/imm32  # copy to EBX
+16   b8/copy                         .               .             .           .             .           .           .               0x5a/imm32/mmap         # copy to EAX
+17   cd/syscall  0x80/imm8
+18 
+19   # store to *EAX
+20   c7/copy                         0/mod/direct    0/rm32/EAX    .           .             .           .           .               0x34/imm32              # copy to *EAX
+21 
+22   # exit(EAX)
+23   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                       # copy EAX to EBX
+24   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit            # copy to EAX
+25   cd/syscall  0x80/imm8
+26 
+27 == data
+28 # various constants used here were found in the Linux sources (search for file mman-common.h)
+29 mmap_new_segment:  # type mmap_arg_struct
+30   # addr
+31   00 00 00 00  # null
+32   # len
+33   00 01 00 00  # 0x1000
+34   # protection flags
+35   03 00 00 00  # PROT_READ | PROT_WRITE
+36   # sharing flags
+37   22 00 00 00  # MAP_PRIVATE | MAP_ANONYMOUS
+38   # fd
+39   ff ff ff ff  # -1 since MAP_ANONYMOUS is specified
+40   # offset
+41   00 00 00 00  # 0 since MAP_ANONYMOUS is specified
+42 
+43 # vim:ft=subx:nowrap:tw&
+
+ + + diff --git a/html/subx/examples/ex2.subx.html b/html/subx/examples/ex2.subx.html index 79014ace..81cce37f 100644 --- a/html/subx/examples/ex2.subx.html +++ b/html/subx/examples/ex2.subx.html @@ -16,6 +16,7 @@ a { color:#eeeeee; text-decoration: none; } a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } .LineNr { color: #444444; } +.Delimiter { color: #800080; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } .SalientComment { color: #00ffff; } @@ -55,9 +56,9 @@ if ('onhashchange' in window) {
  1 ## add 1 and 1, and return the result in the exit code
  2 #
- 3 # To run:
- 4 #   $ subx translate ex2.subx ex2
- 5 #   $ subx run ex2
+ 3 # To run (from the subx directory):
+ 4 #   $ subx translate examples/ex2.subx -o examples/ex2
+ 5 #   $ subx run examples/ex2
  6 # Expected result:
  7 #   $ echo $?
  8 #   2
@@ -66,10 +67,10 @@ if ('onhashchange' in window) {
 11 # instruction                     effective address                                                   operand     displacement    immediate
 12 # op          subop               mod             rm32          base        index         scale       r32
 13 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-14   bb/copy                                                                                                                         1/imm32           # copy 1 to EBX
-15   81          0/subop/add         3/mod/direct    3/rm32/EBX                                                                      1/imm32           # add 1 to EBX
+14   bb/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EBX
+15   43/inc-EBX
 16   # exit(EBX)
-17   b8/copy                                                                                                                         1/imm32           # copy 1 to EAX
+17   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EAX
 18   cd/syscall  0x80/imm8
 19 
 20 # vim:ft=subx
diff --git a/html/subx/examples/ex3.subx.html b/html/subx/examples/ex3.subx.html
index ef71e45f..08832eed 100644
--- a/html/subx/examples/ex3.subx.html
+++ b/html/subx/examples/ex3.subx.html
@@ -55,9 +55,9 @@ if ('onhashchange' in window) {
 
  1 ## add the first 10 numbers, and return the result in the exit code
  2 #
- 3 # To run:
- 4 #   $ subx translate ex3.subx ex3
- 5 #   $ subx run ex3
+ 3 # To run (from the subx directory):
+ 4 #   $ subx translate examples/ex3.subx -o examples/ex3
+ 5 #   $ subx run examples/ex3
  6 # Expected result:
  7 #   $ echo $?
  8 #   55
@@ -67,24 +67,24 @@ if ('onhashchange' in window) {
 12 # op          subop               mod             rm32          base        index         scale       r32
 13 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 14   # result: EBX = 0
-15   bb/copy                                                                                                                         0/imm32           # copy 0 to EBX
+15   bb/copy                                                                                                                         0/imm32           # copy to EBX
 16   # counter: ECX = 1
-17   b9/copy                                                                                                                         1/imm32           # copy 1 to ECX
+17   b9/copy                                                                                                                         1/imm32           # copy to ECX
 18 
 19 $loop:
 20   # while (counter <= 10)
-21   81          7/subop/compare     3/mod/direct    1/rm32/ECX                                                                      0xa/imm32         # compare ECX, 10/imm
+21   81          7/subop/compare     3/mod/direct    1/rm32/ECX                                                                      0xa/imm32         # compare ECX
 22   7f/jump-if-greater  $exit/disp8
 23   # result += counter
 24   01/add                          3/mod/direct    3/rm32/EBX                                          1/r32/ECX                                     # add ECX to EBX
 25   # ++counter
-26   81          0/subop/add         3/mod/direct    1/rm32/ECX                                                                      1/imm32           # add 1 to ECX
+26   41/inc-ECX
 27   # loop
 28   eb/jump  $loop/disp8
 29 
 30 $exit:
 31   # exit(EBX)
-32   b8/copy                                                                                                                         1/imm32           # copy 1 to EAX
+32   b8/copy                                                                                                                         1/imm32           # copy to EAX
 33   cd/syscall  0x80/imm8
 34 
 35 # vim:ft=subx:nowrap
diff --git a/html/subx/examples/ex4.subx.html b/html/subx/examples/ex4.subx.html
index aff62ea1..377949ae 100644
--- a/html/subx/examples/ex4.subx.html
+++ b/html/subx/examples/ex4.subx.html
@@ -55,9 +55,9 @@ if ('onhashchange' in window) {
 
  1 ## read a character from stdin, save it to a global, write it to stdout
  2 #
- 3 # To run:
- 4 #   $ subx translate ex4.subx ex4
- 5 #   $ subx run ex4
+ 3 # To run (from the subx directory):
+ 4 #   $ subx translate examples/ex4.subx -o examples/ex4
+ 5 #   $ subx run examples/ex4
  6 
  7 == code
  8 # instruction                     effective address                                                   operand     displacement    immediate
@@ -66,28 +66,28 @@ if ('onhashchange' in window) {
 11 
 12   # read(stdin, x, 1)
 13     # fd = 0 (stdin)
-14   bb/copy                                                                                                                         0/imm32           # copy 0 to EBX
+14   bb/copy                                                                                                                         0/imm32           # copy to EBX
 15     # initialize x (location to write result to)
 16   b9/copy                                                                                                                         x/imm32           # copy to ECX
 17     # size = 1 character
-18   ba/copy                                                                                                                         1/imm32           # copy 1 to EDX
+18   ba/copy                                                                                                                         1/imm32           # copy to EDX
 19     # read(fd, x, size)
-20   b8/copy                                                                                                                         3/imm32/read      # copy 3 to EAX
+20   b8/copy                                                                                                                         3/imm32/read      # copy to EAX
 21   cd/syscall  0x80/imm8
 22 
 23   # write(stdout, x, 1)
 24     # fd = 1 (stdout)
-25   bb/copy                                                                                                                         1/imm32           # copy 1 to EBX
+25   bb/copy                                                                                                                         1/imm32           # copy to EBX
 26     # initialize x (location to read from)
 27   b9/copy                                                                                                                         x/imm32           # copy to ECX
 28     # size = 1 character
-29   ba/copy                                                                                                                         1/imm32           # copy 1 to EDX
+29   ba/copy                                                                                                                         1/imm32           # copy to EDX
 30     # write(fd, x, size)
-31   b8/copy                                                                                                                         4/imm32/write     # copy 4 to EAX
+31   b8/copy                                                                                                                         4/imm32/write     # copy to EAX
 32   cd/syscall  0x80/imm8
 33 
 34   # exit(EBX)
-35   b8/copy                                                                                                                         1/imm32/exit      # copy 1 to EAX
+35   b8/copy                                                                                                                         1/imm32/exit      # copy to EAX
 36   cd/syscall  0x80/imm8
 37 
 38 == data
diff --git a/html/subx/examples/ex5.subx.html b/html/subx/examples/ex5.subx.html
index 7bd76e98..4392d9e2 100644
--- a/html/subx/examples/ex5.subx.html
+++ b/html/subx/examples/ex5.subx.html
@@ -56,9 +56,9 @@ if ('onhashchange' in window) {
 
  1 ## read a character from stdin, save it to a local on the stack, write it to stdout
  2 #
- 3 # To run:
- 4 #   $ subx translate ex5.subx ex5
- 5 #   $ subx run ex5
+ 3 # To run (from the subx directory):
+ 4 #   $ subx translate examples/ex5.subx -o examples/ex5
+ 5 #   $ subx run examples/ex5
  6 
  7 == code
  8 # instruction                     effective address                                                   operand     displacement    immediate
@@ -67,32 +67,32 @@ if ('onhashchange' in window) {
 11 
 12 # main:
 13   # allocate x on the stack
-14   81          5/subop/subtract    3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # subtract 4 bytes from ESP
+14   81          5/subop/subtract    3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # subtract from ESP
 15 
 16   # read(stdin, x, 1)
 17     # fd = 0 (stdin)
-18   bb/copy                         .               .             .           .             .           .           .               0/imm32           # copy 0 to EBX
+18   bb/copy                         .               .             .           .             .           .           .               0/imm32           # copy to EBX
 19     # initialize x (location to write result to)
 20   8d/copy-address                 1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              1/r32/ECX   4/disp8         .                 # copy ESP+4 to ECX
 21     # size = 1 character
-22   ba/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EDX
+22   ba/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EDX
 23     # read(fd, x, size)
-24   b8/copy                         .               .             .           .             .           .           .               3/imm32/read      # copy 3 to EAX
+24   b8/copy                         .               .             .           .             .           .           .               3/imm32/read      # copy to EAX
 25   cd/syscall  0x80/imm8
 26 
 27   # write(stdout, x, 1)
 28     # fd = 1 (stdout)
-29   bb/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EBX
+29   bb/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EBX
 30     # initialize x (location to read from)
 31   8d/copy-address                 1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              1/r32/ECX   4/disp8         .                 # copy ESP+4 to ECX
 32     # size = 1 character
-33   ba/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EDX
+33   ba/copy                         .               .             .           .             .           .           .               1/imm32           # copy to EDX
 34     # write(fd, x, size)
-35   b8/copy                         .               .             .           .             .           .           .               4/imm32/write     # copy 4 to EAX
+35   b8/copy                         .               .             .           .             .           .           .               4/imm32/write     # copy to EAX
 36   cd/syscall  0x80/imm8
 37 
 38   # exit(EBX)
-39   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit      # copy 1 to EAX
+39   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit      # copy to EAX
 40   cd/syscall  0x80/imm8
 41 
 42 # vim:ft=subx:nowrap
diff --git a/html/subx/examples/ex6.subx.html b/html/subx/examples/ex6.subx.html
index 77c6293e..8ad44018 100644
--- a/html/subx/examples/ex6.subx.html
+++ b/html/subx/examples/ex6.subx.html
@@ -55,9 +55,9 @@ if ('onhashchange' in window) {
 
  1 ## print out a (global variable) string to stdout
  2 #
- 3 # To run:
- 4 #   $ subx translate ex6.subx ex6
- 5 #   $ subx run ex6
+ 3 # To run (from the subx directory):
+ 4 #   $ subx translate examples/ex6.subx -o examples/ex6
+ 5 #   $ subx run examples/ex6
  6 #   Hello, world!
  7 
  8 == code
@@ -67,17 +67,17 @@ if ('onhashchange' in window) {
 12 
 13   # write(stdout, x, size)
 14     # fd = 1 (stdout)
-15   bb/copy                                                                                                                         1/imm32           # copy 1 to EBX
+15   bb/copy                                                                                                                         1/imm32           # copy to EBX
 16     # initialize x (location to write result to)
 17   b9/copy                                                                                                                         x/imm32           # copy to ECX
 18     # initialize size
 19   8b/copy                         0/mod/indirect  5/rm32/.disp32                                      2/r32/EDX   size/disp32                       # copy *size to EDX
 20     # write(fd, x, size)
-21   b8/copy                                                                                                                         4/imm32/write     # copy 4 to EAX
+21   b8/copy                                                                                                                         4/imm32/write     # copy to EAX
 22   cd/syscall  0x80/imm8
 23 
 24   # exit(EBX)
-25   b8/copy                                                                                                                         1/imm32/exit      # copy 1 to EAX
+25   b8/copy                                                                                                                         1/imm32/exit      # copy to EAX
 26   cd/syscall  0x80/imm8
 27 
 28 == data
@@ -85,7 +85,7 @@ if ('onhashchange' in window) {
 30   0e 00 00 00  # 14
 31 x:  # string to print
 32   48 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 0a       00
-33 # h  e  l  l  o  ,  ␣  w  o  r  l  d  !  newline  null
+33 # H  e  l  l  o  ,  ␣  w  o  r  l  d  !  newline  null
 34 
 35 # vim:ft=subx:nowrap
 
diff --git a/html/subx/examples/ex7.subx.html b/html/subx/examples/ex7.subx.html index 8a971671..d5ef4a99 100644 --- a/html/subx/examples/ex7.subx.html +++ b/html/subx/examples/ex7.subx.html @@ -59,9 +59,9 @@ if ('onhashchange' in window) { 3 # it for reading, read a character from it, close it, delete it, and return 4 # the character read. 5 # - 6 # To run: - 7 # $ subx translate ex8.subx ex8 - 8 # $ subx run ex8 + 6 # To run (from the subx directory): + 7 # $ subx translate examples/ex7.subx -o examples/ex7 + 8 # $ subx run examples/ex7 9 # Expected result: 10 # $ echo $? 11 # 97 @@ -74,14 +74,14 @@ if ('onhashchange' in window) { 18 # creat(filename) 19 bb/copy . . . . . . . filename/imm32 # copy to EBX 20 b9/copy . . . . . . . 0x180/imm32/fixed-perms # copy to ECX - 21 b8/copy . . . . . . . 8/imm32/creat # copy 8 to EAX + 21 b8/copy . . . . . . . 8/imm32/creat # copy to EAX 22 cd/syscall 0x80/imm8 23 24 # stream = open(filename, O_WRONLY, 0) # we can't use 'fd' because it looks like a hex byte 25 bb/copy . . . . . . . filename/imm32 # copy to EBX - 26 b9/copy . . . . . . . 1/imm32/wronly # copy 1 to ECX - 27 ba/copy . . . . . . . 0x180/imm32/fixed-perms # copy 0 to EDX - 28 b8/copy . . . . . . . 5/imm32/open # copy 5 to EAX + 26 b9/copy . . . . . . . 1/imm32/wronly # copy to ECX + 27 ba/copy . . . . . . . 0x180/imm32/fixed-perms # copy to EDX + 28 b8/copy . . . . . . . 5/imm32/open # copy to EAX 29 cd/syscall 0x80/imm8 30 # save stream 31 bb/copy . . . . . . . stream/imm32 # copy to EBX @@ -93,8 +93,8 @@ if ('onhashchange' in window) { 37 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX 38 # 39 b9/copy . . . . . . . a/imm32 # copy to ECX - 40 ba/copy . . . . . . . 1/imm32/size # copy 1 to EDX - 41 b8/copy . . . . . . . 4/imm32/write # copy 4 to EAX + 40 ba/copy . . . . . . . 1/imm32/size # copy to EDX + 41 b8/copy . . . . . . . 4/imm32/write # copy to EAX 42 cd/syscall 0x80/imm8 43 44 # close(stream) @@ -102,14 +102,14 @@ if ('onhashchange' in window) { 46 bb/copy . . . . . . . stream/imm32 # copy to EBX 47 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX 48 # - 49 b8/copy . . . . . . . 6/imm32/close # copy 6 to EAX + 49 b8/copy . . . . . . . 6/imm32/close # copy to EAX 50 cd/syscall 0x80/imm8 51 52 # stream = open(filename, O_RDONLY, 0) 53 bb/copy . . . . . . . filename/imm32 # copy to EBX - 54 b9/copy . . . . . . . 0/imm32/rdonly # copy 0 to ECX + 54 b9/copy . . . . . . . 0/imm32/rdonly # copy to ECX 55 ba/copy . . . . . . . 0x180/imm32/fixed-perms # copy to EDX - 56 b8/copy . . . . . . . 5/imm32/open # copy 5 to EAX + 56 b8/copy . . . . . . . 5/imm32/open # copy to EAX 57 cd/syscall 0x80/imm8 58 # save stream 59 bb/copy . . . . . . . stream/imm32 # copy to EBX @@ -121,8 +121,8 @@ if ('onhashchange' in window) { 65 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX 66 # 67 b9/copy . . . . . . . b/imm32 # copy to ECX - 68 ba/copy . . . . . . . 1/imm32/size # copy 1 to EDX - 69 b8/copy . . . . . . . 3/imm32/read # copy 3 to EAX + 68 ba/copy . . . . . . . 1/imm32/size # copy to EDX + 69 b8/copy . . . . . . . 3/imm32/read # copy to EAX 70 cd/syscall 0x80/imm8 71 72 # close(stream) @@ -130,12 +130,12 @@ if ('onhashchange' in window) { 74 bb/copy . . . . . . . stream/imm32 # copy to EBX 75 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX 76 # - 77 b8/copy . . . . . . . 6/imm32/close # copy 6 to EAX + 77 b8/copy . . . . . . . 6/imm32/close # copy to EAX 78 cd/syscall 0x80/imm8 79 80 # unlink(filename) 81 bb/copy . . . . . . . filename/imm32 # copy to EBX - 82 b8/copy . . . . . . . 0xa/imm32/unlink # copy 10 to EAX + 82 b8/copy . . . . . . . 0xa/imm32/unlink # copy to EAX 83 cd/syscall 0x80/imm8 84 85 # exit(b) @@ -143,7 +143,7 @@ if ('onhashchange' in window) { 87 bb/copy . . . . . . . b/imm32 # copy to EBX 88 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX 89 # - 90 b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX + 90 b8/copy . . . . . . . 1/imm32/exit # copy to EAX 91 cd/syscall 0x80/imm8 92 93 == data diff --git a/html/subx/examples/ex8.subx.html b/html/subx/examples/ex8.subx.html index 65f5e09c..abb09b84 100644 --- a/html/subx/examples/ex8.subx.html +++ b/html/subx/examples/ex8.subx.html @@ -56,9 +56,9 @@ if ('onhashchange' in window) {
  1 ## Example reading commandline arguments: compute length of first arg.
  2 #
- 3 # To run:
- 4 #   $ subx translate ex8.subx ex8
- 5 #   $ subx run ex8 abc de fghi
+ 3 # To run (from the subx directory):
+ 4 #   $ subx translate examples/ex8.subx -o examples/ex8
+ 5 #   $ subx run examples/ex8 abc de fghi
  6 # Expected result:
  7 #   $ echo $?
  8 #   3  # length of 'abc'
@@ -74,36 +74,36 @@ if ('onhashchange' in window) {
 18 # instruction                     effective address                                                   operand     displacement    immediate
 19 # op          subop               mod             rm32          base        index         scale       r32
 20 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-21   # var s = argv[1] (EBX)
-22   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              3/r32/EBX   8/disp8         .                       # copy *(ESP+8) to EBX
-23   # call ascii_length(EBX)
+21   # prolog
+22   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+23   # call ascii_length(argv[1])
 24     # push args
-25   53/push-EBX
+25   ff          6/subop/push        1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           .           0x8/disp8       .                 # push *(EBP+8)
 26     # call
 27   e8/call  ascii_length/disp32
 28     # discard args
-29   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32                 # add 4 to ESP
+29   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
 30 
 31   # exit(EAX)
-32   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                       # copy EAX to EBX
-33   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit            # copy 1 to EAX
+32   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
+33   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit      # copy to EAX
 34   cd/syscall  0x80/imm8
 35 
 36 ascii_length:  # (s)
 37   # initialize s (EDX)
-38   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8                                 # copy *(ESP+4) to EDX
+38   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8                           # copy *(ESP+4) to EDX
 39   # var result = 0 (EAX)
-40   b8/copy                         .               .             .           .             .           .           .               0/imm32                 # copy 0 to EAX
+40   b8/copy                         .               .             .           .             .           .           .               0/imm32           # copy to EAX
 41 $ascii_length_loop:
 42   # var c = *s (ECX)
-43   8a/copy                         0/mod/*         2/rm32/EDX    .           .             .           1/r32/ECX   .               .                       # copy byte at *EDX to lower byte of ECX
+43   8a/copy                         0/mod/*         2/rm32/EDX    .           .             .           1/r32/ECX   .               .                 # copy byte at *EDX to lower byte of ECX
 44   # if c == '\0' break
-45   81          7/subop/compare     3/mod/direct    1/rm32/ECX    .           .             .           .           .               0/imm32                 # compare ECX with 0
+45   81          7/subop/compare     3/mod/direct    1/rm32/ECX    .           .             .           .           .               0/imm32           # compare ECX
 46   74/jump-if-equal  $ascii_length_ret/disp8
 47   # ++s
-48   81          0/subop/add         3/mod/direct    2/rm32/EDX    .           .             .           .           .               1/imm32                 # add 1 to EDX
+48   81          0/subop/add         3/mod/direct    2/rm32/EDX    .           .             .           .           .               1/imm32           # add to EDX
 49   # ++result
-50   81          0/subop/add         3/mod/direct    0/rm32/EAX    .           .             .           .           .               1/imm32                 # add 1 to EAX
+50   40/inc-EAX
 51   # loop
 52   eb/jump  $ascii_length_loop/disp8
 53 $ascii_length_ret:
diff --git a/html/subx/examples/ex9.subx.html b/html/subx/examples/ex9.subx.html
index 78d8323e..882f6b51 100644
--- a/html/subx/examples/ex9.subx.html
+++ b/html/subx/examples/ex9.subx.html
@@ -58,9 +58,9 @@ if ('onhashchange' in window) {
  2 # Show difference between ascii codes of first letter of first arg and first
  3 # letter of second arg.
  4 #
- 5 # To run:
- 6 #   $ subx translate ex9.subx ex9
- 7 #   $ subx run ex9 z x
+ 5 # To run (from the subx directory):
+ 6 #   $ subx translate examples/ex9.subx -o examples/ex9
+ 7 #   $ subx run examples/ex9 z x
  8 # Expected result:
  9 #   $ echo $?
 10 #   2
@@ -76,33 +76,32 @@ if ('onhashchange' in window) {
 20 # instruction                     effective address                                                   operand     displacement    immediate
 21 # op          subop               mod             rm32          base        index         scale       r32
 22 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-23   # s1 = argv[1] (EAX)
-24   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              0/r32/EAX   8/disp8         .                 # copy *(ESP+8) to EAX
-25   # s2 = argv[2] (EBX)
-26   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              3/r32/EBX   0xc/disp8       .                 # copy *(ESP+12) to EBX
-27   # call string_equal(s1, s2)
-28     # push args
-29   50/push-EAX
-30   53/push-EBX
-31     # call
-32   e8/call  ascii_difference/disp32
-33     # discard args
-34   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
-35   # exit(EAX)
-36   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
-37   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit      # copy 1 to EAX
-38   cd/syscall  0x80/imm8
-39 
-40 ascii_difference:  # (s1, s2) : null-terminated ascii strings
-41   # a = first letter of s1 (ECX)
-42   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              0/r32/EAX   8/disp8         .                 # copy *(ESP+8) to EAX
-43   8b/copy                         0/mod/indirect  0/rm32/EAX    .           .             .           0/r32/EAX   .               .                 # copy *EAX to EAX
-44   # b = first letter of s2 (EDX)
-45   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              1/r32/ECX   4/disp8                           # copy *(ESP+4) to ECX
-46   8b/copy                         0/mod/indirect  1/rm32/ECX    .           .             .           1/r32/ECX   .               .                 # copy *ECX to ECX
-47   # a-b
-48   29/subtract                     3/mod/direct    0/rm32/EAX    .           .             .           1/r32/ECX   .               .                 # subtract ECX from EAX
-49   c3/return
+23   # prolog
+24   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+25   # call ascii_difference(argv[1], argv[2])
+26     # push argv[2]
+27   ff          6/subop/push        1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           .           0xc/disp8       .                 # push *(EBP+12)
+28     # push argv[1]
+29   ff          6/subop/push        1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           .           0x8/disp8       .                 # push *(EBP+8)
+30     # call
+31   e8/call  ascii_difference/disp32
+32     # discard args
+33   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+34   # exit(EAX)
+35   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
+36   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit      # copy to EAX
+37   cd/syscall  0x80/imm8
+38 
+39 ascii_difference:  # (s1, s2) : null-terminated ascii strings
+40   # a = first letter of s1 (ECX)
+41   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              0/r32/EAX   4/disp8         .                 # copy *(ESP+4) to EAX
+42   8b/copy                         0/mod/indirect  0/rm32/EAX    .           .             .           0/r32/EAX   .               .                 # copy *EAX to EAX
+43   # b = first letter of s2 (EDX)
+44   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              1/r32/ECX   8/disp8                           # copy *(ESP+8) to ECX
+45   8b/copy                         0/mod/indirect  1/rm32/ECX    .           .             .           1/r32/ECX   .               .                 # copy *ECX to ECX
+46   # a-b
+47   29/subtract                     3/mod/direct    0/rm32/EAX    .           .             .           1/r32/ECX   .               .                 # subtract ECX from EAX
+48   c3/return
 
-- cgit 1.4.1-2-gfad0