Mu - subx/010---vm.cc

From 7328af20a1921d9258a60803ee5367da97a6082e Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Mon, 13 Aug 2018 21:25:22 -0700 Subject: 4521 --- html/040brace.cc.html | 12 +- html/subx/001help.cc.html | 20 +- html/subx/003trace.cc.html | 679 +++++++++++++------------ html/subx/003trace.test.cc.html | 164 +++--- html/subx/010---vm.cc.html | 328 ++++++++++++ html/subx/010vm.cc.html | 307 ------------ html/subx/011run.cc.html | 256 ++++++---- html/subx/012elf.cc.html | 56 +-- html/subx/013direct_addressing.cc.html | 202 ++++---- html/subx/014indirect_addressing.cc.html | 152 +++--- html/subx/015immediate_addressing.cc.html | 212 ++++---- html/subx/016index_addressing.cc.html | 26 +- html/subx/017jump_disp8.cc.html | 82 +-- html/subx/018jump_disp16.cc.html | 72 +-- html/subx/019functions.cc.html | 38 +- html/subx/020syscalls.cc.html | 143 ++++-- html/subx/028translate.cc.html | 223 +++++++++ html/subx/029transforms.cc.html | 124 +++++ html/subx/029translate.cc.html | 288 ----------- html/subx/030---operands.cc.html | 536 ++++++++++++++++++++ html/subx/030check_operands.cc.html | 801 ------------------------------ html/subx/031check_operand_bounds.cc.html | 141 ------ html/subx/031check_operands.cc.html | 588 ++++++++++++++++++++++ html/subx/032check_operand_bounds.cc.html | 121 +++++ html/subx/032pack_operands.cc.html | 268 ---------- html/subx/033non_code_segment.cc.html | 39 +- html/subx/034discourage_raw_hex.cc.html | 102 ++++ html/subx/034labels.cc.html | 226 --------- html/subx/035label_types.cc.html | 67 --- html/subx/035labels.cc.html | 304 ++++++++++++ html/subx/036recommend_labels.cc.html | 145 ++++++ html/subx/037label_types.cc.html | 110 ++++ html/subx/038check_local_jumps.cc.html | 125 +++++ html/subx/ex3.subx.html | 8 +- html/subx/ex4.subx.html | 2 +- html/subx/ex6.subx.html | 6 +- html/subx/ex7.subx.html | 4 +- html/subx/ex8.subx.html | 159 ++++++ html/subx/x.subx.html | 124 ----- 39 files changed, 4011 insertions(+), 3249 deletions(-) create mode 100644 html/subx/010---vm.cc.html delete mode 100644 html/subx/010vm.cc.html create mode 100644 html/subx/028translate.cc.html create mode 100644 html/subx/029transforms.cc.html delete mode 100644 html/subx/029translate.cc.html create mode 100644 html/subx/030---operands.cc.html delete mode 100644 html/subx/030check_operands.cc.html delete mode 100644 html/subx/031check_operand_bounds.cc.html create mode 100644 html/subx/031check_operands.cc.html create mode 100644 html/subx/032check_operand_bounds.cc.html delete mode 100644 html/subx/032pack_operands.cc.html create mode 100644 html/subx/034discourage_raw_hex.cc.html delete mode 100644 html/subx/034labels.cc.html delete mode 100644 html/subx/035label_types.cc.html create mode 100644 html/subx/035labels.cc.html create mode 100644 html/subx/036recommend_labels.cc.html create mode 100644 html/subx/037label_types.cc.html create mode 100644 html/subx/038check_local_jumps.cc.html create mode 100644 html/subx/ex8.subx.html delete mode 100644 html/subx/x.subx.html diff --git a/html/040brace.cc.html b/html/040brace.cc.html index 4f47aeb9..4d4d588b 100644 --- a/html/040brace.cc.html +++ b/html/040brace.cc.html @@ -98,19 +98,19 @@ if ('onhashchange' in window) { 36 37 :(code) 38 void transform_braces(const recipe_ordinal r) { - 39 const int OPEN = 0, CLOSE = 1; + 39 const bool OPEN = false, CLOSE = true; 40 // use signed integer for step index because we'll be doing arithmetic on it - 41 list<pair<int/*OPEN/CLOSE*/, /*step*/int> > braces; + 41 list<pair<bool/*OPEN/CLOSE*/, /*step*/int> > braces; 42 trace(9991, "transform") << "--- transform braces for recipe " << get(Recipe, r).name << end(); 43 for (int index = 0; index < SIZE(get(Recipe, r).steps); ++index) { 44 const instruction& inst = get(Recipe, r).steps.at(index); 45 if (inst.label == "{") { 46 trace(9993, "transform") << maybe(get(Recipe, r).name) << "push (open, " << index << ")" << end(); - 47 braces.push_back(pair<int,int>(OPEN, index)); + 47 braces.push_back(pair<bool,int>(OPEN, index)); 48 } 49 if (inst.label == "}") { 50 trace(9993, "transform") << "push (close, " << index << ")" << end(); - 51 braces.push_back(pair<int,int>(CLOSE, index)); + 51 braces.push_back(pair<bool,int>(CLOSE, index)); 52 } 53 } 54 stack</*step*/int> open_braces; @@ -194,9 +194,9 @@ if ('onhashchange' in window) { 132 133 // returns a signed integer not just so that we can return -1 but also to 134 // enable future signed arithmetic -135 int matching_brace(int index, const list<pair<int, int> >& braces, recipe_ordinal r) { +135 int matching_brace(int index, const list<pair<bool, int> >& braces, recipe_ordinal r) { 136 int stacksize = 0; -137 for (list<pair<int, int> >::const_iterator p = braces.begin(); p != braces.end(); ++p) { +137 for (list<pair<bool, int> >::const_iterator p = braces.begin(); p != braces.end(); ++p) { 138 if (p->second < index) continue; 139 stacksize += (p->first ? 1 : -1); 140 if (stacksize == 0) return p->second; diff --git a/html/subx/001help.cc.html b/html/subx/001help.cc.html index e76d8212..d5582776 100644 --- a/html/subx/001help.cc.html +++ b/html/subx/001help.cc.html @@ -137,7 +137,7 @@ if ('onhashchange' in window) { 76 " subx translate <input 'source' file> <output ELF binary>\n" 77 "- Run a SubX binary using SubX itself (for better error messages):\n" 78 " subx run <ELF binary>\n" - 79 "Add '--trace' to any of these commands to also emit a trace, for debugging purposes.\n" + 79 "Add '--trace' to any of these commands to also emit a trace, for debugging purposes.\n" 80 "However, options starting with '--' must always come before any other arguments.\n" 81 "\n" 82 "To start learning how to write SubX programs, run:\n" @@ -153,9 +153,9 @@ if ('onhashchange' in window) { 92 93 bool starts_with(const string& s, const string& pat) { 94 string::const_iterator a=s.begin(), b=pat.begin(); - 95 for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + 95 for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) 96 if (*a != *b) return false; - 97 return b == pat.end(); + 97 return b == pat.end(); 98 } 99 100 //: I'll throw some style conventions here for want of a better place for them. @@ -207,7 +207,7 @@ if ('onhashchange' in window) { 146 //: yadda-yadda. Instead use this macro below to perform an unsafe cast to 147 //: signed. We'll just give up immediately if a container's ever too large. 148 //: Basically, Mu is not concerned about this being a little slower than it -149 //: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7) +149 //: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7) 150 //: 151 //: Addendum to corollary: We're going to uniformly use int everywhere, to 152 //: indicate that we're oblivious to number size, and since Clang on 32-bit @@ -219,7 +219,7 @@ if ('onhashchange' in window) { 158 159 //: 5. Integer overflow is guarded against at runtime using the -ftrapv flag 160 //: to the compiler, supported by Clang (GCC version only works sometimes: -161 //: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work). +161 //: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work). 162 :(before "atexit(reset)") 163 initialize_signal_handlers(); // not always necessary, but doesn't hurt 164 //? cerr << INT_MAX+1 << '\n'; // test overflow @@ -287,17 +287,17 @@ if ('onhashchange' in window) { 226 // from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map 227 template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) { 228 typename T::iterator iter(map.find(key)); -229 if (iter == map.end()) { +229 if (iter == map.end()) { 230 cerr << "get couldn't find key '" << key << "'\n"; -231 assert(iter != map.end()); +231 assert(iter != map.end()); 232 } 233 return iter->second; 234 } 235 template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) { 236 typename T::const_iterator iter(map.find(key)); -237 if (iter == map.end()) { +237 if (iter == map.end()) { 238 cerr << "get couldn't find key '" << key << "'\n"; -239 assert(iter != map.end()); +239 assert(iter != map.end()); 240 } 241 return iter->second; 242 } @@ -306,7 +306,7 @@ if ('onhashchange' in window) { 245 return map[key]; 246 } 247 template<typename T> bool contains_key(T& map, typename T::key_type const& key) { -248 return map.find(key) != map.end(); +248 return map.find(key) != map.end(); 249 } 250 template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) { 251 return map[key]; diff --git a/html/subx/003trace.cc.html b/html/subx/003trace.cc.html index 0a057b43..38d75d22 100644 --- a/html/subx/003trace.cc.html +++ b/html/subx/003trace.cc.html @@ -104,7 +104,7 @@ if ('onhashchange' in window) { 45 //: 46 //: Between layers and domain-driven testing, programming starts to look like a 47 //: fundamentally different activity. Instead of a) superficial, b) local rules - 48 //: on c) code [like say http://blog.bbv.ch/2013/06/05/clean-code-cheat-sheet], + 48 //: on c) code [like say http://blog.bbv.ch/2013/06/05/clean-code-cheat-sheet], 49 //: we allow programmers to engage with the a) deep, b) global structure of the 50 //: c) domain. If you can systematically track discontinuities in the domain, 51 //: you don't care if the code used gotos as long as it passed the tests. If @@ -124,7 +124,7 @@ if ('onhashchange' in window) { 65 //: 66 //: "Programming properly should be regarded as an activity by which 67 //: programmers form a mental model, rather than as production of a program." - 68 //: -- Peter Naur (http://alistair.cockburn.us/ASD+book+extract%3A+%22Naur,+Ehn,+Musashi%22) + 68 //: -- Peter Naur (http://alistair.cockburn.us/ASD+book+extract%3A+%22Naur,+Ehn,+Musashi%22) 69 70 :(before "End Types") 71 struct trace_line { @@ -135,349 +135,346 @@ if ('onhashchange' in window) { 76 trace_line(int d, string l, string c) :depth(d), label(l), contents(c) {} 77 }; 78 - 79 :(before "End Globals") - 80 bool Hide_errors = false; // if set, don't print even error trace lines to screen - 81 bool Dump_trace = false; // if set, print trace lines to screen - 82 string Dump_label = ""; // if set, print trace lines matching a single label to screen - 83 :(before "End Reset") - 84 Hide_errors = false; - 85 Dump_trace = false; - 86 Dump_label = ""; - 87 - 88 //: Support for tracing an entire run. - 89 //: Traces can have a lot of overhead, so only turn them on when asked. - 90 :(before "End Commandline Options(*arg)") - 91 else if (is_equal(*arg, "--trace")) { - 92 Save_trace = true; - 93 } - 94 :(before "End Commandline Parsing") - 95 if (Save_trace) { - 96 cerr << "initializing trace\n"; - 97 Trace_stream = new trace_stream; - 98 } - 99 :(code) -100 void cleanup_main() { -101 if (!Trace_stream) return; -102 if (Save_trace) -103 Trace_stream->save(); -104 delete Trace_stream; -105 Trace_stream = NULL; -106 } -107 :(before "End One-time Setup") -108 atexit(cleanup_main); -109 -110 :(before "End Types") -111 // Pre-define some global constants that trace_stream needs to know about. -112 // Since they're in the Types section, they'll be included in any cleaved -113 // compilation units. So no extern linkage. -114 const int Max_depth = 9999; -115 const int Error_depth = 0; // definitely always print errors -116 const int Warn_depth = 1; -117 -118 struct trace_stream { -119 vector<trace_line> past_lines; -120 // accumulator for current line -121 ostringstream* curr_stream; -122 string curr_label; -123 int curr_depth; -124 int callstack_depth; -125 int collect_depth; -126 ofstream null_stream; // never opens a file, so writes silently fail -127 trace_stream() :curr_stream(NULL), curr_depth(Max_depth), callstack_depth(0), collect_depth(Max_depth) {} -128 ~trace_stream() { if (curr_stream) delete curr_stream; } -129 -130 ostream& stream(string label) { -131 return stream(Max_depth, label); -132 } -133 -134 ostream& stream(int depth, string label) { -135 if (depth > collect_depth) return null_stream; -136 curr_stream = new ostringstream; -137 curr_label = label; -138 curr_depth = depth; -139 return *curr_stream; -140 } -141 -142 void save() { -143 cerr << "saving trace to 'last_run'\n"; -144 ofstream fout("last_run"); -145 fout << readable_contents(""); -146 fout.close(); -147 } -148 -149 // be sure to call this before messing with curr_stream or curr_label -150 void newline(); -151 // useful for debugging -152 string readable_contents(string label); // empty label = show everything -153 }; -154 -155 :(code) -156 void trace_stream::newline() { -157 if (!curr_stream) return; -158 string curr_contents = curr_stream->str(); -159 if (!curr_contents.empty()) { -160 past_lines.push_back(trace_line(curr_depth, trim(curr_label), curr_contents)); // preserve indent in contents -161 if ((!Hide_errors && curr_label == "error") -162 || Dump_trace -163 || (!Dump_label.empty() && curr_label == Dump_label)) -164 cerr << curr_label << ": " << curr_contents << '\n'; -165 } -166 delete curr_stream; -167 curr_stream = NULL; -168 curr_label.clear(); -169 curr_depth = Max_depth; -170 } -171 -172 string trace_stream::readable_contents(string label) { -173 ostringstream output; -174 label = trim(label); -175 for (vector<trace_line>::iterator p = past_lines.begin(); p != past_lines.end(); ++p) -176 if (label.empty() || label == p->label) { -177 output << std::setw(4) << p->depth << ' ' << p->label << ": " << p->contents << '\n'; -178 } -179 return output.str(); -180 } -181 -182 :(before "End Globals") -183 trace_stream* Trace_stream = NULL; -184 int Trace_errors = 0; // used only when Trace_stream is NULL -185 -186 :(before "End Includes") -187 #define CLEAR_TRACE delete Trace_stream, Trace_stream = new trace_stream; -188 -189 // Top-level helper. IMPORTANT: can't nest -190 #define trace(...) !Trace_stream ? cerr /*print nothing*/ : Trace_stream->stream(__VA_ARGS__) + 79 //: Support for tracing an entire run. + 80 //: Traces can have a lot of overhead, so only turn them on when asked. + 81 :(before "End Commandline Options(*arg)") + 82 else if (is_equal(*arg, "--trace")) { + 83 Save_trace = true; + 84 } + 85 :(before "End Commandline Parsing") + 86 if (Save_trace) { + 87 cerr << "initializing trace\n"; + 88 Trace_stream = new trace_stream; + 89 } + 90 :(code) + 91 void cleanup_main() { + 92 if (!Trace_stream) return; + 93 if (Save_trace) + 94 Trace_stream->save(); + 95 delete Trace_stream; + 96 Trace_stream = NULL; + 97 } + 98 :(before "End One-time Setup") + 99 atexit(cleanup_main); +100 +101 :(before "End Types") +102 // Pre-define some global constants that trace_stream needs to know about. +103 // Since they're in the Types section, they'll be included in any cleaved +104 // compilation units. So no extern linkage. +105 const int Max_depth = 9999; +106 const int Error_depth = 0; // definitely always print errors +107 const int Warn_depth = 1; +108 +109 struct trace_stream { +110 vector<trace_line> past_lines; +111 // accumulator for current line +112 ostringstream* curr_stream; +113 string curr_label; +114 int curr_depth; +115 int collect_depth; +116 ofstream null_stream; // never opens a file, so writes silently fail +117 trace_stream() :curr_stream(NULL), curr_depth(Max_depth), collect_depth(Max_depth) {} +118 ~trace_stream() { if (curr_stream) delete curr_stream; } +119 +120 ostream& stream(string label) { +121 return stream(Max_depth, label); +122 } +123 +124 ostream& stream(int depth, string label) { +125 if (depth > collect_depth) return null_stream; +126 curr_stream = new ostringstream; +127 curr_label = label; +128 curr_depth = depth; +129 (*curr_stream) << std::hex; +130 return *curr_stream; +131 } +132 +133 void save() { +134 cerr << "saving trace to 'last_run'\n"; +135 ofstream fout("last_run"); +136 fout << readable_contents(""); +137 fout.close(); +138 } +139 +140 // be sure to call this before messing with curr_stream or curr_label +141 void newline(); +142 // useful for debugging +143 string readable_contents(string label); // empty label = show everything +144 }; +145 +146 :(code) +147 void trace_stream::newline() { +148 if (!curr_stream) return; +149 string curr_contents = curr_stream->str(); +150 if (!curr_contents.empty()) { +151 past_lines.push_back(trace_line(curr_depth, trim(curr_label), curr_contents)); // preserve indent in contents +152 if ((!Hide_errors && curr_depth == Error_depth) +153 || (!Hide_warnings && !Hide_errors && curr_depth == Warn_depth) +154 || Dump_trace +155 || (!Dump_label.empty() && curr_label == Dump_label)) +156 cerr << curr_label << ": " << curr_contents << '\n'; +157 } +158 delete curr_stream; +159 curr_stream = NULL; +160 curr_label.clear(); +161 curr_depth = Max_depth; +162 } +163 +164 string trace_stream::readable_contents(string label) { +165 ostringstream output; +166 label = trim(label); +167 for (vector<trace_line>::iterator p = past_lines.begin(); p != past_lines.end(); ++p) +168 if (label.empty() || label == p->label) { +169 output << std::setw(4) << p->depth << ' ' << p->label << ": " << p->contents << '\n'; +170 } +171 return output.str(); +172 } +173 +174 :(before "End Globals") +175 trace_stream* Trace_stream = NULL; +176 int Trace_errors = 0; // used only when Trace_stream is NULL +177 +178 :(before "End Globals") +179 bool Hide_errors = false; // if set, don't print even error trace lines to screen +180 bool Hide_warnings = false; // if set, don't print warnings to screen +181 bool Dump_trace = false; // if set, print trace lines to screen +182 string Dump_label = ""; // if set, print trace lines matching a single label to screen +183 :(before "End Reset") +184 Hide_errors = false; +185 Hide_warnings = false; +186 Dump_trace = false; +187 Dump_label = ""; +188 //: Never dump warnings in scenarios +189 :(before "End Test Setup") +190 Hide_warnings = true; 191 -192 // Just for debugging; 'git log' should never show any calls to 'dbg'. -193 #define dbg trace(0, "a") -194 #define DUMP(label) if (Trace_stream) cerr << Trace_stream->readable_contents(label); -195 -196 // Errors and warnings are special layers. -197 #define raise (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Error_depth, "error")) -198 #define warn (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Warn_depth, "warn")) -199 // If we aren't yet sure how to deal with some corner case, use assert_for_now -200 // to indicate that it isn't an inviolable invariant. -201 #define assert_for_now assert -202 -203 // Inside tests, fail any tests that displayed (unexpected) errors. -204 // Expected errors in tests should always be hidden and silently checked for. -205 :(before "End Test Teardown") -206 if (Passed && !Hide_errors && trace_contains_errors()) { -207 Passed = false; -208 } -209 :(code) -210 bool trace_contains_errors() { -211 return Trace_errors > 0 || trace_count("error") > 0; -212 } -213 -214 :(before "End Types") -215 struct end {}; +192 :(before "End Includes") +193 #define CLEAR_TRACE delete Trace_stream, Trace_stream = new trace_stream; +194 +195 // Top-level helper. IMPORTANT: can't nest +196 #define trace(...) !Trace_stream ? cerr /*print nothing*/ : Trace_stream->stream(__VA_ARGS__) +197 +198 // Just for debugging; 'git log' should never show any calls to 'dbg'. +199 #define dbg trace(0, "a") +200 #define DUMP(label) if (Trace_stream) cerr << Trace_stream->readable_contents(label); +201 +202 // Errors and warnings are special layers. +203 #define raise (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Error_depth, "error")) +204 #define warn (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Warn_depth, "warn")) +205 // If we aren't yet sure how to deal with some corner case, use assert_for_now +206 // to indicate that it isn't an inviolable invariant. +207 #define assert_for_now assert +208 #define raise_for_now raise +209 +210 // Inside tests, fail any tests that displayed (unexpected) errors. +211 // Expected errors in tests should always be hidden and silently checked for. +212 :(before "End Test Teardown") +213 if (Passed && !Hide_errors && trace_contains_errors()) { +214 Passed = false; +215 } 216 :(code) -217 ostream& operator<<(ostream& os, end /*unused*/) { -218 if (Trace_stream) Trace_stream->newline(); -219 return os; -220 } -221 -222 :(before "End Globals") -223 bool Save_trace = false; // if set, write out trace to disk -224 -225 // Trace_stream is a resource, lease_tracer uses RAII to manage it. -226 :(before "End Types") -227 struct lease_tracer { -228 lease_tracer(); -229 ~lease_tracer(); -230 }; -231 :(code) -232 lease_tracer::lease_tracer() { Trace_stream = new trace_stream; } -233 lease_tracer::~lease_tracer() { -234 if (Save_trace) Trace_stream->save(); -235 delete Trace_stream, Trace_stream = NULL; -236 } -237 :(before "End Includes") -238 #define START_TRACING_UNTIL_END_OF_SCOPE lease_tracer leased_tracer; -239 :(before "End Test Setup") -240 START_TRACING_UNTIL_END_OF_SCOPE -241 -242 :(before "End Includes") -243 #define CHECK_TRACE_CONTENTS(...) check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__) -244 -245 #define CHECK_TRACE_CONTAINS_ERRORS() CHECK(trace_contains_errors()) -246 #define CHECK_TRACE_DOESNT_CONTAIN_ERRORS() \ -247 if (Passed && trace_contains_errors()) { \ -248 cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected errors\n"; \ -249 DUMP("error"); \ -250 Passed = false; \ -251 return; \ -252 } -253 -254 #define CHECK_TRACE_COUNT(label, count) \ -255 if (Passed && trace_count(label) != (count)) { \ -256 cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \ -257 cerr << " got " << trace_count(label) << '\n'; /* multiple eval */ \ -258 DUMP(label); \ -259 Passed = false; \ -260 return; /* Currently we stop at the very first failure. */ \ -261 } -262 -263 #define CHECK_TRACE_DOESNT_CONTAIN(...) CHECK(trace_doesnt_contain(__VA_ARGS__)) -264 -265 :(code) -266 bool check_trace_contents(string FUNCTION, string FILE, int LINE, string expected) { -267 if (!Passed) return false; -268 if (!Trace_stream) return false; -269 vector<string> expected_lines = split(expected, "^D"); -270 int curr_expected_line = 0; -271 while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) -272 ++curr_expected_line; -273 if (curr_expected_line == SIZE(expected_lines)) return true; -274 string label, contents; -275 split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); -276 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -277 if (label != p->label) continue; -278 if (contents != trim(p->contents)) continue; +217 bool trace_contains_errors() { +218 return Trace_errors > 0 || trace_count("error") > 0; +219 } +220 +221 :(before "End Types") +222 struct end {}; +223 :(code) +224 ostream& operator<<(ostream& os, end /*unused*/) { +225 if (Trace_stream) Trace_stream->newline(); +226 return os; +227 } +228 +229 :(before "End Globals") +230 bool Save_trace = false; // if set, write out trace to disk +231 +232 // Trace_stream is a resource, lease_tracer uses RAII to manage it. +233 :(before "End Types") +234 struct lease_tracer { +235 lease_tracer(); +236 ~lease_tracer(); +237 }; +238 :(code) +239 lease_tracer::lease_tracer() { Trace_stream = new trace_stream; } +240 lease_tracer::~lease_tracer() { +241 if (Save_trace) Trace_stream->save(); +242 delete Trace_stream, Trace_stream = NULL; +243 } +244 :(before "End Includes") +245 #define START_TRACING_UNTIL_END_OF_SCOPE lease_tracer leased_tracer; +246 :(before "End Test Setup") +247 START_TRACING_UNTIL_END_OF_SCOPE +248 +249 :(before "End Includes") +250 #define CHECK_TRACE_CONTENTS(...) check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__) +251 +252 #define CHECK_TRACE_CONTAINS_ERRORS() CHECK(trace_contains_errors()) +253 #define CHECK_TRACE_DOESNT_CONTAIN_ERRORS() \ +254 if (Passed && trace_contains_errors()) { \ +255 cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected errors\n"; \ +256 DUMP("error"); \ +257 Passed = false; \ +258 return; \ +259 } +260 +261 #define CHECK_TRACE_COUNT(label, count) \ +262 if (Passed && trace_count(label) != (count)) { \ +263 cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \ +264 cerr << " got " << trace_count(label) << '\n'; /* multiple eval */ \ +265 DUMP(label); \ +266 Passed = false; \ +267 return; /* Currently we stop at the very first failure. */ \ +268 } +269 +270 #define CHECK_TRACE_DOESNT_CONTAIN(...) CHECK(trace_doesnt_contain(__VA_ARGS__)) +271 +272 :(code) +273 bool check_trace_contents(string FUNCTION, string FILE, int LINE, string expected) { +274 if (!Passed) return false; +275 if (!Trace_stream) return false; +276 vector<string> expected_lines = split(expected, "^D"); +277 int curr_expected_line = 0; +278 while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) 279 ++curr_expected_line; -280 while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) -281 ++curr_expected_line; -282 if (curr_expected_line == SIZE(expected_lines)) return true; -283 split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); -284 } -285 -286 if (line_exists_anywhere(label, contents)) { -287 cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n"; -288 DUMP(""); -289 } -290 else { -291 cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n"; -292 DUMP(label); -293 } -294 Passed = false; -295 return false; -296 } -297 -298 void split_label_contents(const string& s, string* label, string* contents) { -299 static const string delim(": "); -300 size_t pos = s.find(delim); -301 if (pos == string::npos) { -302 *label = ""; -303 *contents = trim(s); -304 } -305 else { -306 *label = trim(s.substr(0, pos)); -307 *contents = trim(s.substr(pos+SIZE(delim))); -308 } -309 } -310 -311 bool line_exists_anywhere(const string& label, const string& contents) { -312 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -313 if (label != p->label) continue; -314 if (contents == trim(p->contents)) return true; +280 if (curr_expected_line == SIZE(expected_lines)) return true; +281 string label, contents; +282 split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); +283 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +284 if (label != p->label) continue; +285 if (contents != trim(p->contents)) continue; +286 ++curr_expected_line; +287 while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) +288 ++curr_expected_line; +289 if (curr_expected_line == SIZE(expected_lines)) return true; +290 split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); +291 } +292 +293 if (line_exists_anywhere(label, contents)) { +294 cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n"; +295 DUMP(""); +296 } +297 else { +298 cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n"; +299 DUMP(label); +300 } +301 Passed = false; +302 return false; +303 } +304 +305 void split_label_contents(const string& s, string* label, string* contents) { +306 static const string delim(": "); +307 size_t pos = s.find(delim); +308 if (pos == string::npos) { +309 *label = ""; +310 *contents = trim(s); +311 } +312 else { +313 *label = trim(s.substr(0, pos)); +314 *contents = trim(s.substr(pos+SIZE(delim))); 315 } -316 return false; -317 } -318 -319 int trace_count(string label) { -320 return trace_count(label, ""); -321 } -322 -323 int trace_count(string label, string line) { -324 if (!Trace_stream) return 0; -325 long result = 0; -326 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -327 if (label == p->label) { -328 if (line == "" || trim(line) == trim(p->contents)) -329 ++result; -330 } -331 } -332 return result; -333 } -334 -335 int trace_count_prefix(string label, string prefix) { -336 if (!Trace_stream) return 0; -337 long result = 0; -338 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -339 if (label == p->label) { -340 if (starts_with(trim(p->contents), trim(prefix))) -341 ++result; -342 } -343 } -344 return result; -345 } -346 -347 bool trace_doesnt_contain(string label, string line) { -348 return trace_count(label, line) == 0; -349 } -350 -351 bool trace_doesnt_contain(string expected) { -352 vector<string> tmp = split_first(expected, ": "); -353 return trace_doesnt_contain(tmp.at(0), tmp.at(1)); -354 } -355 -356 vector<string> split(string s, string delim) { -357 vector<string> result; -358 size_t begin=0, end=s.find(delim); -359 while (true) { -360 if (end == string::npos) { -361 result.push_back(string(s, begin, string::npos)); -362 break; -363 } -364 result.push_back(string(s, begin, end-begin)); -365 begin = end+SIZE(delim); -366 end = s.find(delim, begin); -367 } -368 return result; -369 } -370 -371 vector<string> split_first(string s, string delim) { -372 vector<string> result; -373 size_t end=s.find(delim); -374 result.push_back(string(s, 0, end)); -375 if (end != string::npos) -376 result.push_back(string(s, end+SIZE(delim), string::npos)); -377 return result; -378 } -379 -380 string trim(const string& s) { -381 string::const_iterator first = s.begin(); -382 while (first != s.end() && isspace(*first)) -383 ++first; -384 if (first == s.end()) return ""; -385 -386 string::const_iterator last = --s.end(); -387 while (last != s.begin() && isspace(*last)) -388 --last; -389 ++last; -390 return string(first, last); -391 } -392 -393 :(before "End Includes") -394 #include <vector> -395 using std::vector; -396 #include <list> -397 using std::list; -398 #include <set> -399 using std::set; -400 -401 #include <sstream> -402 using std::istringstream; -403 using std::ostringstream; -404 -405 #include <fstream> -406 using std::ifstream; -407 using std::ofstream; -408 -409 :(before "End Globals") -410 //: In future layers we'll use the depth field as follows: -411 //: -412 //: Errors will be depth 0. -413 //: Mu 'applications' will be able to use depths 1-100 as they like. -414 //: Primitive statements will occupy 101-9989 -415 extern const int Initial_callstack_depth = 101; -416 extern const int Max_callstack_depth = 9989; -417 //: Finally, details of primitive Mu statements will occupy depth 9990-9999 -418 //: (more on that later as well) -419 //: -420 //: This framework should help us hide some details at each level, mixing -421 //: static ideas like layers with the dynamic notion of call-stack depth. +316 } +317 +318 bool line_exists_anywhere(const string& label, const string& contents) { +319 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +320 if (label != p->label) continue; +321 if (contents == trim(p->contents)) return true; +322 } +323 return false; +324 } +325 +326 int trace_count(string label) { +327 return trace_count(label, ""); +328 } +329 +330 int trace_count(string label, string line) { +331 if (!Trace_stream) return 0; +332 long result = 0; +333 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +334 if (label == p->label) { +335 if (line == "" || trim(line) == trim(p->contents)) +336 ++result; +337 } +338 } +339 return result; +340 } +341 +342 int trace_count_prefix(string label, string prefix) { +343 if (!Trace_stream) return 0; +344 long result = 0; +345 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { +346 if (label == p->label) { +347 if (starts_with(trim(p->contents), trim(prefix))) +348 ++result; +349 } +350 } +351 return result; +352 } +353 +354 bool trace_doesnt_contain(string label, string line) { +355 return trace_count(label, line) == 0; +356 } +357 +358 bool trace_doesnt_contain(string expected) { +359 vector<string> tmp = split_first(expected, ": "); +360 if (SIZE(tmp) == 1) { +361 raise << expected << ": missing label or contents in trace line\n" << end(); +362 assert(false); +363 } +364 return trace_doesnt_contain(tmp.at(0), tmp.at(1)); +365 } +366 +367 vector<string> split(string s, string delim) { +368 vector<string> result; +369 size_t begin=0, end=s.find(delim); +370 while (true) { +371 if (end == string::npos) { +372 result.push_back(string(s, begin, string::npos)); +373 break; +374 } +375 result.push_back(string(s, begin, end-begin)); +376 begin = end+SIZE(delim); +377 end = s.find(delim, begin); +378 } +379 return result; +380 } +381 +382 vector<string> split_first(string s, string delim) { +383 vector<string> result; +384 size_t end=s.find(delim); +385 result.push_back(string(s, 0, end)); +386 if (end != string::npos) +387 result.push_back(string(s, end+SIZE(delim), string::npos)); +388 return result; +389 } +390 +391 string trim(const string& s) { +392 string::const_iterator first = s.begin(); +393 while (first != s.end() && isspace(*first)) +394 ++first; +395 if (first == s.end()) return ""; +396 +397 string::const_iterator last = --s.end(); +398 while (last != s.begin() && isspace(*last)) +399 --last; +400 ++last; +401 return string(first, last); +402 } +403 +404 :(before "End Includes") +405 #include <vector> +406 using std::vector; +407 #include <list> +408 using std::list; +409 #include <set> +410 using std::set; +411 +412 #include <sstream> +413 using std::istringstream; +414 using std::ostringstream; +415 +416 #include <fstream> +417 using std::ifstream; +418 using std::ofstream; diff --git a/html/subx/003trace.test.cc.html b/html/subx/003trace.test.cc.html index 094ca159..77156ad1 100644 --- a/html/subx/003trace.test.cc.html +++ b/html/subx/003trace.test.cc.html @@ -57,66 +57,66 @@ if ('onhashchange' in window) {

   1 void test_trace_check_compares() {
-  2   trace("test layer") << "foo" << end();
-  3   CHECK_TRACE_CONTENTS("test layer: foo");
+  2   trace("test layer") << "foo" << end();
+  3   CHECK_TRACE_CONTENTS("test layer: foo");
   4 }
   5 
   6 void test_trace_check_ignores_other_layers() {
-  7   trace("test layer 1") << "foo" << end();
-  8   trace("test layer 2") << "bar" << end();
-  9   CHECK_TRACE_CONTENTS("test layer 1: foo");
- 10   CHECK_TRACE_DOESNT_CONTAIN("test layer 2: foo");
+  7   trace("test layer 1") << "foo" << end();
+  8   trace("test layer 2") << "bar" << end();
+  9   CHECK_TRACE_CONTENTS("test layer 1: foo");
+ 10   CHECK_TRACE_DOESNT_CONTAIN("test layer 2: foo");
  11 }
  12 
  13 void test_trace_check_ignores_leading_whitespace() {
- 14   trace("test layer 1") << " foo" << end();
+ 14   trace("test layer 1") << " foo" << end();
  15   CHECK_EQ(trace_count("test layer 1", /*too little whitespace*/"foo"), 1);
  16   CHECK_EQ(trace_count("test layer 1", /*too much whitespace*/"  foo"), 1);
  17 }
  18 
  19 void test_trace_check_ignores_other_lines() {
- 20   trace("test layer 1") << "foo" << end();
- 21   trace("test layer 1") << "bar" << end();
- 22   CHECK_TRACE_CONTENTS("test layer 1: foo");
+ 20   trace("test layer 1") << "foo" << end();
+ 21   trace("test layer 1") << "bar" << end();
+ 22   CHECK_TRACE_CONTENTS("test layer 1: foo");
  23 }
  24 
  25 void test_trace_check_ignores_other_lines2() {
- 26   trace("test layer 1") << "foo" << end();
- 27   trace("test layer 1") << "bar" << end();
- 28   CHECK_TRACE_CONTENTS("test layer 1: bar");
+ 26   trace("test layer 1") << "foo" << end();
+ 27   trace("test layer 1") << "bar" << end();
+ 28   CHECK_TRACE_CONTENTS("test layer 1: bar");
  29 }
  30 
  31 void test_trace_ignores_trailing_whitespace() {
- 32   trace("test layer 1") << "foo\n" << end();
- 33   CHECK_TRACE_CONTENTS("test layer 1: foo");
+ 32   trace("test layer 1") << "foo\n" << end();
+ 33   CHECK_TRACE_CONTENTS("test layer 1: foo");
  34 }
  35 
  36 void test_trace_ignores_trailing_whitespace2() {
- 37   trace("test layer 1") << "foo " << end();
- 38   CHECK_TRACE_CONTENTS("test layer 1: foo");
+ 37   trace("test layer 1") << "foo " << end();
+ 38   CHECK_TRACE_CONTENTS("test layer 1: foo");
  39 }
  40 
  41 void test_trace_orders_across_layers() {
- 42   trace("test layer 1") << "foo" << end();
- 43   trace("test layer 2") << "bar" << end();
- 44   trace("test layer 1") << "qux" << end();
- 45   CHECK_TRACE_CONTENTS("test layer 1: foo^Dtest layer 2: bar^Dtest layer 1: qux^D");
+ 42   trace("test layer 1") << "foo" << end();
+ 43   trace("test layer 2") << "bar" << end();
+ 44   trace("test layer 1") << "qux" << end();
+ 45   CHECK_TRACE_CONTENTS("test layer 1: foo^Dtest layer 2: bar^Dtest layer 1: qux^D");
  46 }
  47 
  48 void test_trace_supports_count() {
- 49   trace("test layer 1") << "foo" << end();
- 50   trace("test layer 1") << "foo" << end();
+ 49   trace("test layer 1") << "foo" << end();
+ 50   trace("test layer 1") << "foo" << end();
  51   CHECK_EQ(trace_count("test layer 1", "foo"), 2);
  52 }
  53 
  54 void test_trace_supports_count2() {
- 55   trace("test layer 1") << "foo" << end();
- 56   trace("test layer 1") << "bar" << end();
+ 55   trace("test layer 1") << "foo" << end();
+ 56   trace("test layer 1") << "bar" << end();
  57   CHECK_EQ(trace_count("test layer 1"), 2);
  58 }
  59 
  60 void test_trace_count_ignores_trailing_whitespace() {
- 61   trace("test layer 1") << "foo\n" << end();
+ 61   trace("test layer 1") << "foo\n" << end();
  62   CHECK_EQ(trace_count("test layer 1", "foo"), 1);
  63 }
  64 
@@ -124,62 +124,64 @@ if ('onhashchange' in window) {
  66 // pending: readable_contents() adds newline if necessary.
  67 // pending: raise also prints to stderr.
  68 // pending: raise doesn't print to stderr if Hide_errors is set.
- 69 // pending: raise doesn't have to be saved if Hide_errors is set, just printed.
- 70 // pending: raise prints to stderr if Trace_stream is NULL.
- 71 // pending: raise prints to stderr if Trace_stream is NULL even if Hide_errors is set.
- 72 
- 73 // can't check trace because trace methods call 'split'
+ 69 // pending: warn doesn't print to stderr if Hide_errors is set.
+ 70 // pending: warn doesn't print to stderr if Hide_warnings is set.
+ 71 // pending: raise doesn't have to be saved if Hide_errors is set, just printed.
+ 72 // pending: raise prints to stderr if Trace_stream is NULL.
+ 73 // pending: raise prints to stderr if Trace_stream is NULL even if Hide_errors is set.
  74 
- 75 void test_split_returns_at_least_one_elem() {
- 76   vector<string> result = split("", ",");
- 77   CHECK_EQ(result.size(), 1);
- 78   CHECK_EQ(result.at(0), "");
- 79 }
- 80 
- 81 void test_split_returns_entire_input_when_no_delim() {
- 82   vector<string> result = split("abc", ",");
- 83   CHECK_EQ(result.size(), 1);
- 84   CHECK_EQ(result.at(0), "abc");
- 85 }
- 86 
- 87 void test_split_works() {
- 88   vector<string> result = split("abc,def", ",");
- 89   CHECK_EQ(result.size(), 2);
- 90   CHECK_EQ(result.at(0), "abc");
- 91   CHECK_EQ(result.at(1), "def");
- 92 }
- 93 
- 94 void test_split_works2() {
- 95   vector<string> result = split("abc,def,ghi", ",");
- 96   CHECK_EQ(result.size(), 3);
- 97   CHECK_EQ(result.at(0), "abc");
- 98   CHECK_EQ(result.at(1), "def");
- 99   CHECK_EQ(result.at(2), "ghi");
-100 }
-101 
-102 void test_split_handles_multichar_delim() {
-103   vector<string> result = split("abc,,def,,ghi", ",,");
-104   CHECK_EQ(result.size(), 3);
-105   CHECK_EQ(result.at(0), "abc");
-106   CHECK_EQ(result.at(1), "def");
-107   CHECK_EQ(result.at(2), "ghi");
-108 }
-109 
-110 void test_trim() {
-111   CHECK_EQ(trim(""), "");
-112   CHECK_EQ(trim(" "), "");
-113   CHECK_EQ(trim("  "), "");
-114   CHECK_EQ(trim("a"), "a");
-115   CHECK_EQ(trim(" a"), "a");
-116   CHECK_EQ(trim("  a"), "a");
-117   CHECK_EQ(trim("  ab"), "ab");
-118   CHECK_EQ(trim("a "), "a");
-119   CHECK_EQ(trim("a  "), "a");
-120   CHECK_EQ(trim("ab  "), "ab");
-121   CHECK_EQ(trim(" a "), "a");
-122   CHECK_EQ(trim("  a  "), "a");
-123   CHECK_EQ(trim("  ab  "), "ab");
-124 }
+ 75 // can't check trace because trace methods call 'split'
+ 76 
+ 77 void test_split_returns_at_least_one_elem() {
+ 78   vector<string> result = split("", ",");
+ 79   CHECK_EQ(result.size(), 1);
+ 80   CHECK_EQ(result.at(0), "");
+ 81 }
+ 82 
+ 83 void test_split_returns_entire_input_when_no_delim() {
+ 84   vector<string> result = split("abc", ",");
+ 85   CHECK_EQ(result.size(), 1);
+ 86   CHECK_EQ(result.at(0), "abc");
+ 87 }
+ 88 
+ 89 void test_split_works() {
+ 90   vector<string> result = split("abc,def", ",");
+ 91   CHECK_EQ(result.size(), 2);
+ 92   CHECK_EQ(result.at(0), "abc");
+ 93   CHECK_EQ(result.at(1), "def");
+ 94 }
+ 95 
+ 96 void test_split_works2() {
+ 97   vector<string> result = split("abc,def,ghi", ",");
+ 98   CHECK_EQ(result.size(), 3);
+ 99   CHECK_EQ(result.at(0), "abc");
+100   CHECK_EQ(result.at(1), "def");
+101   CHECK_EQ(result.at(2), "ghi");
+102 }
+103 
+104 void test_split_handles_multichar_delim() {
+105   vector<string> result = split("abc,,def,,ghi", ",,");
+106   CHECK_EQ(result.size(), 3);
+107   CHECK_EQ(result.at(0), "abc");
+108   CHECK_EQ(result.at(1), "def");
+109   CHECK_EQ(result.at(2), "ghi");
+110 }
+111 
+112 void test_trim() {
+113   CHECK_EQ(trim(""), "");
+114   CHECK_EQ(trim(" "), "");
+115   CHECK_EQ(trim("  "), "");
+116   CHECK_EQ(trim("a"), "a");
+117   CHECK_EQ(trim(" a"), "a");
+118   CHECK_EQ(trim("  a"), "a");
+119   CHECK_EQ(trim("  ab"), "ab");
+120   CHECK_EQ(trim("a "), "a");
+121   CHECK_EQ(trim("a  "), "a");
+122   CHECK_EQ(trim("ab  "), "ab");
+123   CHECK_EQ(trim(" a "), "a");
+124   CHECK_EQ(trim("  a  "), "a");
+125   CHECK_EQ(trim("  ab  "), "ab");
+126 }

diff --git a/html/subx/010---vm.cc.html b/html/subx/010---vm.cc.html new file mode 100644 index 00000000..06951ccb --- /dev/null +++ b/html/subx/010---vm.cc.html @@ -0,0 +1,328 @@ + + + + +Mu - subx/010---vm.cc + + + + + + + + + + +

+  1 //: Core data structures for simulating the SubX VM (subset of an x86 processor)
+  2 //:
+  3 //: At the lowest level ("level 1") of abstraction, SubX executes x86
+  4 //: instructions provided in the form of an array of bytes, loaded into memory
+  5 //: starting at a specific address.
+  6 
+  7 //:: registers
+  8 //: assume segment registers are hard-coded to 0
+  9 //: no floating-point, MMX, etc. yet
+ 10 
+ 11 :(before "End Types")
+ 12 enum {
+ 13   EAX,
+ 14   ECX,
+ 15   EDX,
+ 16   EBX,
+ 17   ESP,
+ 18   EBP,
+ 19   ESI,
+ 20   EDI,
+ 21   NUM_INT_REGISTERS,
+ 22 };
+ 23 union reg {
+ 24   int32_t i;
+ 25   uint32_t u;
+ 26 };
+ 27 :(before "End Globals")
+ 28 reg Reg[NUM_INT_REGISTERS] = { {0} };
+ 29 uint32_t EIP = 1;  // preserve null pointer
+ 30 :(before "End Reset")
+ 31 bzero(Reg, sizeof(Reg));
+ 32 EIP = 1;  // preserve null pointer
+ 33 
+ 34 :(before "End Help Contents")
+ 35 cerr << "  registers\n";
+ 36 :(before "End Help Texts")
+ 37 put(Help, "registers",
+ 38   "SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
+ 39   "R4 (ESP) contains the top of the stack.\n"
+ 40   "\n"
+ 41   "There's also a register for the address of the currently executing\n"
+ 42   "instruction. It is modified by jumps.\n"
+ 43   "\n"
+ 44   "Various instructions modify one or more of three 1-bit 'flag' registers,\n"
+ 45   "as a side-effect:\n"
+ 46   "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
+ 47   "  reset if not.\n"
+ 48   "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
+ 49   "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
+ 50   "The flag bits are read by conditional jumps.\n"
+ 51   "\n"
+ 52   "We don't support non-integer (floating-point) registers yet.\n"
+ 53 );
+ 54 
+ 55 :(before "End Globals")
+ 56 // the subset of x86 flag registers we care about
+ 57 bool SF = false;  // sign flag
+ 58 bool ZF = false;  // zero flag
+ 59 bool OF = false;  // overflow flag
+ 60 :(before "End Reset")
+ 61 SF = ZF = OF = false;
+ 62 
+ 63 //: how the flag registers are updated after each instruction
+ 64 
+ 65 :(before "End Includes")
+ 66 // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
+ 67 // result in 'arg1', then update flags.
+ 68 // beware: no side-effects in args
+ 69 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
+ 70   /* arg1 and arg2 must be signed */ \
+ 71   int64_t tmp = arg1 op arg2; \
+ 72   arg1 = arg1 op arg2; \
+ 73   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
+ 74   SF = (arg1 < 0); \
+ 75   ZF = (arg1 == 0); \
+ 76   OF = (arg1 != tmp); \
+ 77 }
+ 78 
+ 79 // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
+ 80 // in 'arg1', then update flags.
+ 81 #define BINARY_BITWISE_OP(op, arg1, arg2) { \
+ 82   /* arg1 and arg2 must be unsigned */ \
+ 83   arg1 = arg1 op arg2; \
+ 84   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
+ 85   SF = (arg1 >> 31); \
+ 86   ZF = (arg1 == 0); \
+ 87   OF = false; \
+ 88 }
+ 89 
+ 90 //:: simulated RAM
+ 91 
+ 92 :(before "End Globals")
+ 93 vector<uint8_t> Mem;
+ 94 uint32_t Mem_offset = 0;
+ 95 uint32_t End_of_program = 0;
+ 96 :(before "End Reset")
+ 97 Mem.clear();
+ 98 Mem.resize(1024);
+ 99 Mem_offset = 0;
+100 End_of_program = 0;
+101 :(code)
+102 // These helpers depend on Mem being laid out contiguously (so you can't use a
+103 // map, etc.) and on the host also being little-endian.
+104 inline uint8_t read_mem_u8(uint32_t addr) {
+105   return Mem.at(addr-Mem_offset);
+106 }
+107 inline int8_t read_mem_i8(uint32_t addr) {
+108   return static_cast<int8_t>(Mem.at(addr-Mem_offset));
+109 }
+110 inline uint32_t read_mem_u32(uint32_t addr) {
+111   return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
+112 }
+113 inline int32_t read_mem_i32(uint32_t addr) {
+114   return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
+115 }
+116 
+117 inline uint8_t* mem_addr_u8(uint32_t addr) {
+118   return &Mem.at(addr-Mem_offset);
+119 }
+120 inline int8_t* mem_addr_i8(uint32_t addr) {
+121   return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
+122 }
+123 inline char* mem_addr_string(uint32_t addr) {
+124   return reinterpret_cast<char*>(&Mem.at(addr-Mem_offset));
+125 }
+126 inline uint32_t* mem_addr_u32(uint32_t addr) {
+127   return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
+128 }
+129 inline int32_t* mem_addr_i32(uint32_t addr) {
+130   return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
+131 }
+132 
+133 inline void write_mem_u8(uint32_t addr, uint8_t val) {
+134   Mem.at(addr-Mem_offset) = val;
+135 }
+136 inline void write_mem_i8(uint32_t addr, int8_t val) {
+137   Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
+138 }
+139 inline void write_mem_u32(uint32_t addr, uint32_t val) {
+140   *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
+141 }
+142 inline void write_mem_i32(uint32_t addr, int32_t val) {
+143   *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
+144 }
+145 
+146 //:: core interpreter loop
+147 
+148 :(code)
+149 // skeleton of how x86 instructions are decoded
+150 void run_one_instruction() {
+151   uint8_t op=0, op2=0, op3=0;
+152   trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
+153 //?   dump_registers();
+154 //?   cerr << "inst: 0x" << EIP << " => ";
+155   op = next();
+156 //?   cerr << HEXBYTE << NUM(op) << '\n';
+157   switch (op) {
+158   case 0xf4:  // hlt
+159     EIP = End_of_program;
+160     break;
+161   // End Single-Byte Opcodes
+162   case 0x0f:
+163     switch(op2 = next()) {
+164     // End Two-Byte Opcodes Starting With 0f
+165     default:
+166       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
+167       DUMP("");
+168       exit(1);
+169     }
+170     break;
+171   case 0xf2:
+172     switch(op2 = next()) {
+173     // End Two-Byte Opcodes Starting With f2
+174     case 0x0f:
+175       switch(op3 = next()) {
+176       // End Three-Byte Opcodes Starting With f2 0f
+177       default:
+178         cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
+179         DUMP("");
+180         exit(1);
+181       }
+182       break;
+183     default:
+184       cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
+185       DUMP("");
+186       exit(1);
+187     }
+188     break;
+189   case 0xf3:
+190     switch(op2 = next()) {
+191     // End Two-Byte Opcodes Starting With f3
+192     case 0x0f:
+193       switch(op3 = next()) {
+194       // End Three-Byte Opcodes Starting With f3 0f
+195       default:
+196         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
+197         DUMP("");
+198         exit(1);
+199       }
+200       break;
+201     default:
+202       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
+203       DUMP("");
+204       exit(1);
+205     }
+206     break;
+207   default:
+208     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
+209     DUMP("");
+210     exit(1);
+211   }
+212 }
+213 
+214 inline uint8_t next() {
+215   return read_mem_u8(EIP++);
+216 }
+217 
+218 void dump_registers() {
+219   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
+220     if (i > 0) cerr << "; ";
+221     cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
+222   }
+223   cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
+224 }
+225 
+226 //: start tracking supported opcodes
+227 :(before "End Globals")
+228 map</*op*/string, string> name;
+229 map</*op*/string, string> name_0f;
+230 map</*op*/string, string> name_f3;
+231 map</*op*/string, string> name_f3_0f;
+232 :(before "End One-time Setup")
+233 init_op_names();
+234 :(code)
+235 void init_op_names() {
+236   put(name, "f4", "halt");
+237   // End Initialize Op Names(name)
+238 }
+239 
+240 :(before "End Help Special-cases(key)")
+241 if (key == "opcodes") {
+242   cerr << "Opcodes currently supported by SubX:\n";
+243   for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
+244     cerr << "  " << p->first << ": " << p->second << '\n';
+245   for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
+246     cerr << "  0f " << p->first << ": " << p->second << '\n';
+247   for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
+248     cerr << "  f3 " << p->first << ": " << p->second << '\n';
+249   for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
+250     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
+251   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
+252   return 0;
+253 }
+254 :(before "End Help Contents")
+255 cerr << "  opcodes\n";
+256 
+257 :(before "End Includes")
+258 #include <iomanip>
+259 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
+260 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
+261 // ugly that iostream doesn't print uint8_t as an integer
+262 #define NUM(X) static_cast<int>(X)
+263 #include <stdint.h>
+

+ + + diff --git a/html/subx/010vm.cc.html b/html/subx/010vm.cc.html deleted file mode 100644 index 1e5d5d87..00000000 --- a/html/subx/010vm.cc.html +++ /dev/null @@ -1,307 +0,0 @@ - - - - -Mu - subx/010vm.cc - - - - - - - - - - -

-  1 //: Core data structures for simulating the SubX VM (subset of an x86 processor)
-  2 //:
-  3 //: At the lowest level ("level 1") of abstraction, SubX executes x86
-  4 //: instructions provided in the form of an array of bytes, loaded into memory
-  5 //: starting at a specific address.
-  6 
-  7 //:: registers
-  8 //: assume segment registers are hard-coded to 0
-  9 //: no floating-point, MMX, etc. yet
- 10 
- 11 :(before "End Types")
- 12 enum {
- 13   EAX,
- 14   ECX,
- 15   EDX,
- 16   EBX,
- 17   ESP,
- 18   EBP,
- 19   ESI,
- 20   EDI,
- 21   NUM_INT_REGISTERS,
- 22 };
- 23 union reg {
- 24   int32_t i;
- 25   uint32_t u;
- 26 };
- 27 :(before "End Globals")
- 28 reg Reg[NUM_INT_REGISTERS] = { {0} };
- 29 uint32_t EIP = 1;  // preserve null pointer
- 30 :(before "End Reset")
- 31 bzero(Reg, sizeof(Reg));
- 32 EIP = 1;  // preserve null pointer
- 33 
- 34 :(before "End Help Contents")
- 35 cerr << "  registers\n";
- 36 :(before "End Help Texts")
- 37 put(Help, "registers",
- 38   "SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
- 39   "R4 (ESP) contains the top of the stack.\n"
- 40   "\n"
- 41   "There's also a register for the address of the currently executing\n"
- 42   "instruction. It is modified by jumps.\n"
- 43   "\n"
- 44   "Various instructions modify one or more of three 1-bit 'flag' registers,\n"
- 45   "as a side-effect:\n"
- 46   "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
- 47   "  reset if not.\n"
- 48   "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
- 49   "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
- 50   "The flag bits are read by conditional jumps.\n"
- 51   "\n"
- 52   "We don't support non-integer (floating-point) registers yet.\n"
- 53 );
- 54 
- 55 :(before "End Globals")
- 56 // the subset of x86 flag registers we care about
- 57 bool SF = false;  // sign flag
- 58 bool ZF = false;  // zero flag
- 59 bool OF = false;  // overflow flag
- 60 :(before "End Reset")
- 61 SF = ZF = OF = false;
- 62 
- 63 //: how the flag registers are updated after each instruction
- 64 
- 65 :(before "End Includes")
- 66 // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
- 67 // result in 'arg1', then update flags.
- 68 // beware: no side-effects in args
- 69 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
- 70   /* arg1 and arg2 must be signed */ \
- 71   int64_t tmp = arg1 op arg2; \
- 72   arg1 = arg1 op arg2; \
- 73   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
- 74   SF = (arg1 < 0); \
- 75   ZF = (arg1 == 0); \
- 76   OF = (arg1 != tmp); \
- 77 }
- 78 
- 79 // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
- 80 // in 'arg1', then update flags.
- 81 #define BINARY_BITWISE_OP(op, arg1, arg2) { \
- 82   /* arg1 and arg2 must be unsigned */ \
- 83   arg1 = arg1 op arg2; \
- 84   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
- 85   SF = (arg1 >> 31); \
- 86   ZF = (arg1 == 0); \
- 87   OF = false; \
- 88 }
- 89 
- 90 //:: simulated RAM
- 91 
- 92 :(before "End Globals")
- 93 vector<uint8_t> Mem;
- 94 uint32_t Mem_offset = 0;
- 95 uint32_t End_of_program = 0;
- 96 :(before "End Reset")
- 97 Mem.clear();
- 98 Mem.resize(1024);
- 99 Mem_offset = 0;
-100 End_of_program = 0;
-101 :(code)
-102 // These helpers depend on Mem being laid out contiguously (so you can't use a
-103 // map, etc.) and on the host also being little-endian.
-104 inline uint8_t read_mem_u8(uint32_t addr) {
-105   return Mem.at(addr-Mem_offset);
-106 }
-107 inline int8_t read_mem_i8(uint32_t addr) {
-108   return static_cast<int8_t>(Mem.at(addr-Mem_offset));
-109 }
-110 inline uint32_t read_mem_u32(uint32_t addr) {
-111   return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
-112 }
-113 inline int32_t read_mem_i32(uint32_t addr) {
-114   return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
-115 }
-116 
-117 inline uint8_t* mem_addr_u8(uint32_t addr) {
-118   return &Mem.at(addr-Mem_offset);
-119 }
-120 inline int8_t* mem_addr_i8(uint32_t addr) {
-121   return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
-122 }
-123 inline uint32_t* mem_addr_u32(uint32_t addr) {
-124   return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
-125 }
-126 inline int32_t* mem_addr_i32(uint32_t addr) {
-127   return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
-128 }
-129 
-130 inline void write_mem_u8(uint32_t addr, uint8_t val) {
-131   Mem.at(addr-Mem_offset) = val;
-132 }
-133 inline void write_mem_i8(uint32_t addr, int8_t val) {
-134   Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
-135 }
-136 inline void write_mem_u32(uint32_t addr, uint32_t val) {
-137   *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
-138 }
-139 inline void write_mem_i32(uint32_t addr, int32_t val) {
-140   *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
-141 }
-142 
-143 //:: core interpreter loop
-144 
-145 :(code)
-146 // skeleton of how x86 instructions are decoded
-147 void run_one_instruction() {
-148   uint8_t op=0, op2=0, op3=0;
-149   trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
-150 //?   dump_registers();
-151 //?   cerr << "inst: 0x" << EIP << " => ";
-152   op = next();
-153 //?   cerr << HEXBYTE << NUM(op) << '\n';
-154   switch (op) {
-155   case 0xf4:  // hlt
-156     EIP = End_of_program;
-157     break;
-158   // End Single-Byte Opcodes
-159   case 0x0f:
-160     switch(op2 = next()) {
-161     // End Two-Byte Opcodes Starting With 0f
-162     default:
-163       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
-164       DUMP("");
-165       exit(1);
-166     }
-167     break;
-168   case 0xf3:
-169     switch(op2 = next()) {
-170     // End Two-Byte Opcodes Starting With f3
-171     case 0x0f:
-172       switch(op3 = next()) {
-173       // End Three-Byte Opcodes Starting With f3 0f
-174       default:
-175         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
-176         DUMP("");
-177         exit(1);
-178       }
-179       break;
-180     default:
-181       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
-182       DUMP("");
-183       exit(1);
-184     }
-185     break;
-186   default:
-187     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
-188     DUMP("");
-189     exit(1);
-190   }
-191 }
-192 
-193 inline uint8_t next() {
-194   return read_mem_u8(EIP++);
-195 }
-196 
-197 void dump_registers() {
-198   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
-199     if (i > 0) cerr << "; ";
-200     cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
-201   }
-202   cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
-203 }
-204 
-205 //: start tracking supported opcodes
-206 :(before "End Globals")
-207 map</*op*/string, string> name;
-208 map</*op*/string, string> name_0f;
-209 map</*op*/string, string> name_f3;
-210 map</*op*/string, string> name_f3_0f;
-211 :(before "End One-time Setup")
-212 init_op_names();
-213 :(code)
-214 void init_op_names() {
-215   put(name, "f4", "halt");
-216   // End Initialize Op Names(name)
-217 }
-218 
-219 :(before "End Help Special-cases(key)")
-220 if (key == "opcodes") {
-221   cerr << "Opcodes currently supported by SubX:\n";
-222   for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
-223     cerr << "  " << p->first << ": " << p->second << '\n';
-224   for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
-225     cerr << "  0f " << p->first << ": " << p->second << '\n';
-226   for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
-227     cerr << "  f3 " << p->first << ": " << p->second << '\n';
-228   for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
-229     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
-230   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
-231   return 0;
-232 }
-233 :(before "End Help Contents")
-234 cerr << "  opcodes\n";
-235 
-236 :(before "End Includes")
-237 #include <iomanip>
-238 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
-239 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
-240 // ugly that iostream doesn't print uint8_t as an integer
-241 #define NUM(X) static_cast<int>(X)
-242 #include <stdint.h>
-

- - - diff --git a/html/subx/011run.cc.html b/html/subx/011run.cc.html index 0248497c..653447d6 100644 --- a/html/subx/011run.cc.html +++ b/html/subx/011run.cc.html @@ -16,6 +16,7 @@ a { color:#eeeeee; text-decoration: none; } a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } .Constant { color: #00a0a0; } +.Special { color: #c00000; } .SalientComment { color: #00ffff; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } @@ -71,7 +72,7 @@ if ('onhashchange' in window) { 9 "Line-endings are significant; each line should contain a single\n" 10 "instruction, macro or directive.\n" 11 "\n" - 12 "Comments start with the '#' character. It should be at the start of a word\n" + 12 "Comments start with the '#' character. It should be at the start of a word\n" 13 "(start of line, or following a space).\n" 14 "\n" 15 "Each segment starts with a header line: a '==' delimiter followed by the\n" @@ -133,7 +134,7 @@ if ('onhashchange' in window) { 71 +load: 0x00000003 -> 0b 72 +load: 0x00000004 -> 0c 73 +load: 0x00000005 -> 0d - 74 +run: add imm32 0x0d0c0b0a to reg EAX + 74 +run: add imm32 0x0d0c0b0a to reg EAX 75 +run: storing 0x0d0c0b0a 76 77 :(code) @@ -142,14 +143,14 @@ if ('onhashchange' in window) { 80 void run(const string& text_bytes) { 81 program p; 82 istringstream in(text_bytes); - 83 parse(in, p); - 84 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately - 85 transform(p); - 86 if (trace_contains_errors()) return; + 83 parse(in, p); + 84 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately + 85 transform(p); + 86 if (trace_contains_errors()) return; 87 load(p); - 88 if (trace_contains_errors()) return; - 89 while (EIP < End_of_program) - 90 run_one_instruction(); + 88 if (trace_contains_errors()) return; + 89 while (EIP < End_of_program) + 90 run_one_instruction(); 91 } 92 93 //:: core data structures @@ -172,70 +173,70 @@ if ('onhashchange' in window) { 110 struct line { 111 vector<word> words; 112 vector<string> metadata; -113 }; -114 :(before "struct line") -115 struct word { -116 string original; -117 string data; -118 vector<string> metadata; -119 }; -120 -121 //:: parse -122 -123 :(code) -124 void parse(istream& fin, program& out) { -125 vector<line> l; -126 trace(99, "parse") << "begin" << end(); -127 while (has_data(fin)) { -128 string line_data; -129 getline(fin, line_data); -130 trace(99, "parse") << "line: " << line_data << end(); -131 istringstream lin(line_data); -132 vector<word> w; -133 while (has_data(lin)) { -134 string word_data; -135 lin >> word_data; -136 if (word_data.empty()) continue; -137 if (word_data[0] == '#') break; // comment -138 if (word_data == ".") continue; // comment token -139 if (word_data == "==") { -140 if (!l.empty()) { -141 assert(!out.segments.empty()); -142 trace(99, "parse") << "flushing to segment" << end(); -143 out.segments.back().lines.swap(l); -144 } -145 segment s; -146 lin >> std::hex >> s.start; -147 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); -148 out.segments.push_back(s); -149 // todo? -150 break; // skip rest of line -151 } -152 if (word_data[0] == ':') { -153 // todo: line metadata -154 break; -155 } -156 w.push_back(word()); -157 w.back().original = word_data; -158 istringstream win(word_data); -159 if (getline(win, w.back().data, '/')) { -160 string m; -161 while (getline(win, m, '/')) -162 w.back().metadata.push_back(m); -163 } -164 trace(99, "parse") << "new word: " << w.back().data << end(); -165 } -166 if (!w.empty()) { -167 l.push_back(line()); -168 l.back().words.swap(w); -169 } +113 string original; +114 }; +115 :(before "struct line") +116 struct word { +117 string original; +118 string data; +119 vector<string> metadata; +120 }; +121 +122 //:: parse +123 +124 :(code) +125 void parse(istream& fin, program& out) { +126 vector<line> l; +127 trace(99, "parse") << "begin" << end(); +128 while (has_data(fin)) { +129 string line_data; +130 line curr; +131 getline(fin, line_data); +132 curr.original = line_data; +133 trace(99, "parse") << "line: " << line_data << end(); +134 istringstream lin(line_data); +135 while (has_data(lin)) { +136 string word_data; +137 lin >> word_data; +138 if (word_data.empty()) continue; +139 if (word_data[0] == '#') break; // comment +140 if (word_data == ".") continue; // comment token +141 if (word_data == "==") { +142 if (!l.empty()) { +143 assert(!out.segments.empty()); +144 trace(99, "parse") << "flushing to segment" << end(); +145 out.segments.back().lines.swap(l); +146 } +147 segment s; +148 lin >> std::hex >> s.start; +149 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); +150 out.segments.push_back(s); +151 // todo? +152 break; // skip rest of line +153 } +154 if (word_data[0] == ':') { +155 // todo: line metadata +156 break; +157 } +158 curr.words.push_back(word()); +159 curr.words.back().original = word_data; +160 istringstream win(word_data); +161 if (getline(win, curr.words.back().data, '/')) { +162 string m; +163 while (getline(win, m, '/')) +164 curr.words.back().metadata.push_back(m); +165 } +166 trace(99, "parse") << "new word: " << curr.words.back().data << end(); +167 } +168 if (!curr.words.empty()) +169 l.push_back(curr); 170 } 171 if (!l.empty()) { 172 assert(!out.segments.empty()); -173 trace(99, "parse") << "flushing to segment" << end(); +173 trace(99, "parse") << "flushing to segment" << end(); 174 out.segments.back().lines.swap(l); 175 } -176 trace(99, "parse") << "done" << end(); +176 trace(99, "parse") << "done" << end(); 177 } 178 179 //:: transform @@ -245,80 +246,119 @@ if ('onhashchange' in window) { 183 :(before "End Globals") 184 vector<transform_fn> Transform; 185 -186 void transform(program& p) { -187 trace(99, "transform") << "begin" << end(); +186 void transform(program& p) { +187 trace(99, "transform") << "begin" << end(); 188 for (int t = 0; t < SIZE(Transform); ++t) 189 (*Transform.at(t))(p); -190 trace(99, "transform") << "done" << end(); +190 trace(99, "transform") << "done" << end(); 191 } 192 193 //:: load 194 195 void load(const program& p) { -196 trace(99, "load") << "begin" << end(); +196 trace(99, "load") << "begin" << end(); 197 if (p.segments.empty()) { -198 raise << "no code to run\n" << end(); +198 raise << "no code to run\n" << end(); 199 return; 200 } 201 for (int i = 0; i < SIZE(p.segments); ++i) { 202 const segment& seg = p.segments.at(i); 203 uint32_t addr = seg.start; -204 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); +204 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); 205 for (int j = 0; j < SIZE(seg.lines); ++j) { 206 const line& l = seg.lines.at(j); 207 for (int k = 0; k < SIZE(l.words); ++k) { 208 const word& w = l.words.at(k); -209 uint8_t val = hex_byte(w.data); -210 if (trace_contains_errors()) return; -211 write_mem_u8(addr, val); -212 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); +209 uint8_t val = hex_byte(w.data); +210 if (trace_contains_errors()) return; +211 write_mem_u8(addr, val); +212 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); 213 ++addr; 214 } 215 } -216 if (i == 0) End_of_program = addr; +216 if (i == 0) End_of_program = addr; 217 } -218 EIP = p.segments.at(0).start; -219 trace(99, "load") << "done" << end(); +218 EIP = p.segments.at(0).start; +219 trace(99, "load") << "done" << end(); 220 } 221 222 uint8_t hex_byte(const string& s) { 223 istringstream in(s); 224 int result = 0; 225 in >> std::hex >> result; -226 if (!in) { -227 raise << "invalid hex " << s << '\n' << end(); +226 if (!in || !in.eof()) { +227 raise << "token '" << s << "' is not a hex byte\n" << end(); 228 return '\0'; 229 } -230 if (result > 0xff) { -231 raise << "invalid hex byte " << std::hex << result << '\n' << end(); +230 if (result > 0xff || result < -0x8f) { +231 raise << "token '" << s << "' is not a hex byte\n" << end(); 232 return '\0'; 233 } 234 return static_cast<uint8_t>(result); 235 } 236 -237 //:: run -238 -239 :(before "End Initialize Op Names(name)") -240 put(name, "05", "add imm32 to R0 (EAX)"); -241 -242 //: our first opcode -243 :(before "End Single-Byte Opcodes") -244 case 0x05: { // add imm32 to EAX -245 int32_t arg2 = imm32(); -246 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); -247 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); -248 break; -249 } -250 -251 :(code) -252 // read a 32-bit immediate in little-endian order from the instruction stream -253 int32_t imm32() { -254 int32_t result = next(); -255 result |= (next()<<8); -256 result |= (next()<<16); -257 result |= (next()<<24); -258 return result; -259 } +237 :(scenarios parse_and_load) +238 :(scenario number_too_large) +239 % Hide_errors = true; +240 == 0x1 +241 05 cab +242 +error: token 'cab' is not a hex byte +243 +244 :(scenario invalid_hex) +245 % Hide_errors = true; +246 == 0x1 +247 05 cx +248 +error: token 'cx' is not a hex byte +249 +250 :(scenario negative_number) +251 == 0x1 +252 05 -12 +253 $error: 0 +254 +255 :(scenario negative_number_too_small) +256 % Hide_errors = true; +257 == 0x1 +258 05 -12345 +259 +error: token '-12345' is not a hex byte +260 +261 :(scenario hex_prefix) +262 == 0x1 +263 0x05 -0x12 +264 $error: 0 +265 +266 //: helper for tests +267 :(code) +268 void parse_and_load(const string& text_bytes) { +269 program p; +270 istringstream in(text_bytes); +271 parse(in, p); +272 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately +273 load(p); +274 } +275 +276 //:: run +277 +278 :(before "End Initialize Op Names(name)") +279 put(name, "05", "add imm32 to R0 (EAX)"); +280 +281 //: our first opcode +282 :(before "End Single-Byte Opcodes") +283 case 0x05: { // add imm32 to EAX +284 int32_t arg2 = imm32(); +285 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); +286 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); +287 break; +288 } +289 +290 :(code) +291 // read a 32-bit immediate in little-endian order from the instruction stream +292 int32_t imm32() { +293 int32_t result = next(); +294 result |= (next()<<8); +295 result |= (next()<<16); +296 result |= (next()<<24); +297 return result; +298 } diff --git a/html/subx/012elf.cc.html b/html/subx/012elf.cc.html index 554ebed9..cb4b255d 100644 --- a/html/subx/012elf.cc.html +++ b/html/subx/012elf.cc.html @@ -63,29 +63,29 @@ if ('onhashchange' in window) { 4 :(before "End Main") 5 assert(argc > 1); 6 if (is_equal(argv[1], "run")) { - 7 START_TRACING_UNTIL_END_OF_SCOPE; + 7 START_TRACING_UNTIL_END_OF_SCOPE; 8 assert(argc > 2); 9 reset(); 10 cerr << std::hex; 11 initialize_mem(); - 12 Mem_offset = CODE_START; + 12 Mem_offset = CODE_START; 13 load_elf(argv[2]); - 14 while (EIP < End_of_program) // weak final-gasp termination check - 15 run_one_instruction(); - 16 dbg << "executed past end of the world: " << EIP << " vs " << End_of_program << end(); + 14 while (EIP < End_of_program) // weak final-gasp termination check + 15 run_one_instruction(); + 16 trace(90, "load") << "executed past end of the world: " << EIP << " vs " << End_of_program << end(); 17 return 0; 18 } 19 20 :(code) 21 void load_elf(const string& filename) { 22 int fd = open(filename.c_str(), O_RDONLY); - 23 if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die(); + 23 if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die(); 24 off_t size = lseek(fd, 0, SEEK_END); 25 lseek(fd, 0, SEEK_SET); 26 uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size)); - 27 if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die(); + 27 if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die(); 28 ssize_t read_size = read(fd, elf_contents, size); - 29 if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die(); + 29 if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die(); 30 load_elf_contents(elf_contents, size); 31 free(elf_contents); 32 } @@ -94,25 +94,25 @@ if ('onhashchange' in window) { 35 uint8_t magic[5] = {0}; 36 memcpy(magic, elf_contents, 4); 37 if (memcmp(magic, "\177ELF", 4) != 0) - 38 raise << "Invalid ELF file; starts with \"" << magic << '"' << die(); + 38 raise << "Invalid ELF file; starts with \"" << magic << '"' << die(); 39 if (elf_contents[4] != 1) - 40 raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die(); + 40 raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die(); 41 if (elf_contents[5] != 1) - 42 raise << "Only little-endian ELF files supported.\n" << die(); + 42 raise << "Only little-endian ELF files supported.\n" << die(); 43 // unused: remaining 10 bytes of e_ident 44 uint32_t e_machine_type = u32_in(&elf_contents[16]); 45 if (e_machine_type != 0x00030002) - 46 raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die(); + 46 raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die(); 47 // unused: e_version. We only support version 1, and later versions will be backwards compatible. 48 uint32_t e_entry = u32_in(&elf_contents[24]); 49 uint32_t e_phoff = u32_in(&elf_contents[28]); 50 // unused: e_shoff 51 // unused: e_flags 52 uint32_t e_ehsize = u16_in(&elf_contents[40]); - 53 if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die(); + 53 if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die(); 54 uint32_t e_phentsize = u16_in(&elf_contents[42]); 55 uint32_t e_phnum = u16_in(&elf_contents[44]); - 56 dbg << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end(); + 56 trace(90, "load") << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end(); 57 // unused: e_shentsize 58 // unused: e_shnum 59 // unused: e_shstrndx @@ -123,35 +123,35 @@ if ('onhashchange' in window) { 64 // initialize code and stack 65 Reg[ESP].u = AFTER_STACK; 66 Reg[EBP].u = 0; - 67 EIP = e_entry; + 67 EIP = e_entry; 68 } 69 70 void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32_t offset, uint32_t e_ehsize) { 71 uint32_t p_type = u32_in(&elf_contents[offset]); - 72 dbg << "program header at offset " << offset << ": type " << p_type << end(); + 72 trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end(); 73 if (p_type != 1) { - 74 dbg << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end(); + 74 trace(90, "load") << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end(); 75 return; 76 } 77 uint32_t p_offset = u32_in(&elf_contents[offset + 4]); 78 uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]); - 79 if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die(); + 79 if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die(); 80 // unused: p_paddr 81 uint32_t p_filesz = u32_in(&elf_contents[offset + 16]); 82 uint32_t p_memsz = u32_in(&elf_contents[offset + 20]); 83 if (p_filesz != p_memsz) - 84 raise << "Can't handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die(); + 84 raise << "Can't handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die(); 85 86 if (p_offset + p_filesz > size) - 87 raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die(); - 88 if (Mem.size() < p_vaddr + p_memsz) - 89 Mem.resize(p_vaddr + p_memsz); + 87 raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die(); + 88 if (Mem.size() < p_vaddr + p_memsz) + 89 Mem.resize(p_vaddr + p_memsz); 90 if (size > p_memsz) size = p_memsz; - 91 dbg << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end(); + 91 trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end(); 92 for (size_t i = 0; i < p_filesz; ++i) - 93 write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]); - 94 if (End_of_program < p_vaddr+p_memsz) - 95 End_of_program = p_vaddr+p_memsz; + 93 write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]); + 94 if (End_of_program < p_vaddr+p_memsz) + 95 End_of_program = p_vaddr+p_memsz; 96 } 97 98 :(before "End Includes") @@ -165,7 +165,7 @@ if ('onhashchange' in window) { 106 const int AFTER_STACK = 0x0804c000; 107 :(code) 108 void initialize_mem() { -109 Mem.resize(AFTER_STACK - CODE_START); +109 Mem.resize(AFTER_STACK - CODE_START); 110 } 111 112 inline uint32_t u32_in(uint8_t* p) { @@ -189,7 +189,7 @@ if ('onhashchange' in window) { 130 struct die {}; 131 :(code) 132 ostream& operator<<(ostream& /*unused*/, die /*unused*/) { -133 if (Trace_stream) Trace_stream->newline(); +133 if (Trace_stream) Trace_stream->newline(); 134 exit(1); 135 } 136 diff --git a/html/subx/013direct_addressing.cc.html b/html/subx/013direct_addressing.cc.html index b7c416dd..69e32bc5 100644 --- a/html/subx/013direct_addressing.cc.html +++ b/html/subx/013direct_addressing.cc.html @@ -66,7 +66,7 @@ if ('onhashchange' in window) { 1 //: operating directly on a register 2 3 :(before "End Initialize Op Names(name)") - 4 put(name, "01", "add r32 to rm32"); + 4 put(name, "01", "add r32 to rm32"); 5 6 :(scenario add_r32_to_r32) 7 % Reg[EAX].i = 0x10; @@ -75,17 +75,17 @@ if ('onhashchange' in window) { 10 # op ModR/M SIB displacement immediate 11 01 d8 # add EBX to EAX 12 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) - 13 +run: add EBX to r/m32 - 14 +run: r/m32 is EAX + 13 +run: add EBX to r/m32 + 14 +run: r/m32 is EAX 15 +run: storing 0x00000011 16 17 :(before "End Single-Byte Opcodes") 18 case 0x01: { // add r32 to r/m32 - 19 uint8_t modrm = next(); + 19 uint8_t modrm = next(); 20 uint8_t arg2 = (modrm>>3)&0x7; - 21 trace(90, "run") << "add " << rname(arg2) << " to r/m32" << end(); + 21 trace(90, "run") << "add " << rname(arg2) << " to r/m32" << end(); 22 int32_t* arg1 = effective_address(modrm); - 23 BINARY_ARITHMETIC_OP(+, *arg1, Reg[arg2].i); + 23 BINARY_ARITHMETIC_OP(+, *arg1, Reg[arg2].i); 24 break; 25 } 26 @@ -101,15 +101,15 @@ if ('onhashchange' in window) { 36 switch (mod) { 37 case 3: 38 // mod 3 is just register direct addressing - 39 trace(90, "run") << "r/m32 is " << rname(rm) << end(); + 39 trace(90, "run") << "r/m32 is " << rname(rm) << end(); 40 return &Reg[rm].i; 41 // End Mod Special-cases(addr) 42 default: - 43 cerr << "unrecognized mod bits: " << NUM(mod) << '\n'; + 43 cerr << "unrecognized mod bits: " << NUM(mod) << '\n'; 44 exit(1); 45 } 46 //: other mods are indirect, and they'll set addr appropriately - 47 return mem_addr_i32(addr); + 47 return mem_addr_i32(addr); 48 } 49 50 string rname(uint8_t r) { @@ -122,14 +122,14 @@ if ('onhashchange' in window) { 57 case 5: return "EBP"; 58 case 6: return "ESI"; 59 case 7: return "EDI"; - 60 default: raise << "invalid register " << r << '\n' << end(); return ""; + 60 default: raise << "invalid register " << r << '\n' << end(); return ""; 61 } 62 } 63 64 //:: subtract 65 66 :(before "End Initialize Op Names(name)") - 67 put(name, "29", "subtract r32 from rm32"); + 67 put(name, "29", "subtract r32 from rm32"); 68 69 :(scenario subtract_r32_from_r32) 70 % Reg[EAX].i = 10; @@ -138,24 +138,24 @@ if ('onhashchange' in window) { 73 # op ModR/M SIB displacement immediate 74 29 d8 # subtract EBX from EAX 75 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) - 76 +run: subtract EBX from r/m32 - 77 +run: r/m32 is EAX + 76 +run: subtract EBX from r/m32 + 77 +run: r/m32 is EAX 78 +run: storing 0x00000009 79 80 :(before "End Single-Byte Opcodes") 81 case 0x29: { // subtract r32 from r/m32 - 82 uint8_t modrm = next(); + 82 uint8_t modrm = next(); 83 uint8_t arg2 = (modrm>>3)&0x7; - 84 trace(90, "run") << "subtract " << rname(arg2) << " from r/m32" << end(); + 84 trace(90, "run") << "subtract " << rname(arg2) << " from r/m32" << end(); 85 int32_t* arg1 = effective_address(modrm); - 86 BINARY_ARITHMETIC_OP(-, *arg1, Reg[arg2].i); + 86 BINARY_ARITHMETIC_OP(-, *arg1, Reg[arg2].i); 87 break; 88 } 89 90 //:: multiply 91 92 :(before "End Initialize Op Names(name)") - 93 put(name_0f, "af", "multiply rm32 into r32"); + 93 put(name_0f, "af", "multiply rm32 into r32"); 94 95 :(scenario multiply_r32_into_r32) 96 % Reg[EAX].i = 4; @@ -164,24 +164,24 @@ if ('onhashchange' in window) { 99 # op ModR/M SIB displacement immediate 100 0f af d8 # subtract EBX into EAX 101 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -102 +run: multiply r/m32 into EBX -103 +run: r/m32 is EAX +102 +run: multiply r/m32 into EBX +103 +run: r/m32 is EAX 104 +run: storing 0x00000008 105 106 :(before "End Two-Byte Opcodes Starting With 0f") 107 case 0xaf: { // multiply r32 into r/m32 -108 uint8_t modrm = next(); +108 uint8_t modrm = next(); 109 uint8_t arg2 = (modrm>>3)&0x7; -110 trace(90, "run") << "multiply r/m32 into " << rname(arg2) << end(); +110 trace(90, "run") << "multiply r/m32 into " << rname(arg2) << end(); 111 int32_t* arg1 = effective_address(modrm); -112 BINARY_ARITHMETIC_OP(*, Reg[arg2].i, *arg1); +112 BINARY_ARITHMETIC_OP(*, Reg[arg2].i, *arg1); 113 break; 114 } 115 116 //:: and 117 118 :(before "End Initialize Op Names(name)") -119 put(name, "21", "rm32 = bitwise AND of r32 with rm32"); +119 put(name, "21", "rm32 = bitwise AND of r32 with rm32"); 120 121 :(scenario and_r32_with_r32) 122 % Reg[EAX].i = 0x0a0b0c0d; @@ -190,24 +190,24 @@ if ('onhashchange' in window) { 125 # op ModR/M SIB displacement immediate 126 21 d8 # and EBX with destination EAX 127 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -128 +run: and EBX with r/m32 -129 +run: r/m32 is EAX +128 +run: and EBX with r/m32 +129 +run: r/m32 is EAX 130 +run: storing 0x0000000d 131 132 :(before "End Single-Byte Opcodes") 133 case 0x21: { // and r32 with r/m32 -134 uint8_t modrm = next(); +134 uint8_t modrm = next(); 135 uint8_t arg2 = (modrm>>3)&0x7; -136 trace(90, "run") << "and " << rname(arg2) << " with r/m32" << end(); +136 trace(90, "run") << "and " << rname(arg2) << " with r/m32" << end(); 137 int32_t* arg1 = effective_address(modrm); -138 BINARY_BITWISE_OP(&, *arg1, Reg[arg2].u); +138 BINARY_BITWISE_OP(&, *arg1, Reg[arg2].u); 139 break; 140 } 141 142 //:: or 143 144 :(before "End Initialize Op Names(name)") -145 put(name, "09", "rm32 = bitwise OR of r32 with rm32"); +145 put(name, "09", "rm32 = bitwise OR of r32 with rm32"); 146 147 :(scenario or_r32_with_r32) 148 % Reg[EAX].i = 0x0a0b0c0d; @@ -216,24 +216,24 @@ if ('onhashchange' in window) { 151 # op ModR/M SIB displacement immediate 152 09 d8 # or EBX with destination EAX 153 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -154 +run: or EBX with r/m32 -155 +run: r/m32 is EAX +154 +run: or EBX with r/m32 +155 +run: r/m32 is EAX 156 +run: storing 0xaabbccdd 157 158 :(before "End Single-Byte Opcodes") 159 case 0x09: { // or r32 with r/m32 -160 uint8_t modrm = next(); +160 uint8_t modrm = next(); 161 uint8_t arg2 = (modrm>>3)&0x7; -162 trace(90, "run") << "or " << rname(arg2) << " with r/m32" << end(); +162 trace(90, "run") << "or " << rname(arg2) << " with r/m32" << end(); 163 int32_t* arg1 = effective_address(modrm); -164 BINARY_BITWISE_OP(|, *arg1, Reg[arg2].u); +164 BINARY_BITWISE_OP(|, *arg1, Reg[arg2].u); 165 break; 166 } 167 168 //:: xor 169 170 :(before "End Initialize Op Names(name)") -171 put(name, "31", "rm32 = bitwise XOR of r32 with rm32"); +171 put(name, "31", "rm32 = bitwise XOR of r32 with rm32"); 172 173 :(scenario xor_r32_with_r32) 174 % Reg[EAX].i = 0x0a0b0c0d; @@ -242,24 +242,24 @@ if ('onhashchange' in window) { 177 # op ModR/M SIB displacement immediate 178 31 d8 # xor EBX with destination EAX 179 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -180 +run: xor EBX with r/m32 -181 +run: r/m32 is EAX +180 +run: xor EBX with r/m32 +181 +run: r/m32 is EAX 182 +run: storing 0xa0b0ccdd 183 184 :(before "End Single-Byte Opcodes") 185 case 0x31: { // xor r32 with r/m32 -186 uint8_t modrm = next(); +186 uint8_t modrm = next(); 187 uint8_t arg2 = (modrm>>3)&0x7; -188 trace(90, "run") << "xor " << rname(arg2) << " with r/m32" << end(); +188 trace(90, "run") << "xor " << rname(arg2) << " with r/m32" << end(); 189 int32_t* arg1 = effective_address(modrm); -190 BINARY_BITWISE_OP(^, *arg1, Reg[arg2].u); +190 BINARY_BITWISE_OP(^, *arg1, Reg[arg2].u); 191 break; 192 } 193 194 //:: not 195 196 :(before "End Initialize Op Names(name)") -197 put(name, "f7", "bitwise complement of rm32"); +197 put(name, "f7", "bitwise complement of rm32"); 198 199 :(scenario not_r32) 200 % Reg[EBX].i = 0x0f0f00ff; @@ -268,26 +268,26 @@ if ('onhashchange' in window) { 203 f7 c3 # not EBX 204 # ModR/M in binary: 11 (direct mode) 000 (unused) 011 (dest EBX) 205 +run: 'not' of r/m32 -206 +run: r/m32 is EBX +206 +run: r/m32 is EBX 207 +run: storing 0xf0f0ff00 208 209 :(before "End Single-Byte Opcodes") 210 case 0xf7: { // xor r32 with r/m32 -211 uint8_t modrm = next(); -212 trace(90, "run") << "'not' of r/m32" << end(); +211 uint8_t modrm = next(); +212 trace(90, "run") << "'not' of r/m32" << end(); 213 int32_t* arg1 = effective_address(modrm); 214 *arg1 = ~(*arg1); -215 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); +215 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); 216 SF = (*arg1 >> 31); 217 ZF = (*arg1 == 0); -218 OF = false; +218 OF = false; 219 break; 220 } 221 222 //:: compare (cmp) 223 224 :(before "End Initialize Op Names(name)") -225 put(name, "39", "set SF if rm32 < r32"); +225 put(name, "39", "set SF if rm32 < r32"); 226 227 :(scenario compare_r32_with_r32_greater) 228 % Reg[EAX].i = 0x0a0b0c0d; @@ -296,23 +296,23 @@ if ('onhashchange' in window) { 231 # op ModR/M SIB displacement immediate 232 39 d8 # compare EBX with EAX 233 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -234 +run: compare EBX with r/m32 -235 +run: r/m32 is EAX +234 +run: compare EBX with r/m32 +235 +run: r/m32 is EAX 236 +run: SF=0; ZF=0; OF=0 237 238 :(before "End Single-Byte Opcodes") 239 case 0x39: { // set SF if r/m32 < r32 -240 uint8_t modrm = next(); +240 uint8_t modrm = next(); 241 uint8_t reg2 = (modrm>>3)&0x7; -242 trace(90, "run") << "compare " << rname(reg2) << " with r/m32" << end(); +242 trace(90, "run") << "compare " << rname(reg2) << " with r/m32" << end(); 243 int32_t* arg1 = effective_address(modrm); 244 int32_t arg2 = Reg[reg2].i; 245 int32_t tmp1 = *arg1 - arg2; 246 SF = (tmp1 < 0); 247 ZF = (tmp1 == 0); 248 int64_t tmp2 = *arg1 - arg2; -249 OF = (tmp1 != tmp2); -250 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +249 OF = (tmp1 != tmp2); +250 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 251 break; 252 } 253 @@ -323,8 +323,8 @@ if ('onhashchange' in window) { 258 # op ModR/M SIB displacement immediate 259 39 d8 # compare EBX with EAX 260 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -261 +run: compare EBX with r/m32 -262 +run: r/m32 is EAX +261 +run: compare EBX with r/m32 +262 +run: r/m32 is EAX 263 +run: SF=1; ZF=0; OF=0 264 265 :(scenario compare_r32_with_r32_equal) @@ -334,14 +334,14 @@ if ('onhashchange' in window) { 269 # op ModR/M SIB displacement immediate 270 39 d8 # compare EBX with EAX 271 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -272 +run: compare EBX with r/m32 -273 +run: r/m32 is EAX +272 +run: compare EBX with r/m32 +273 +run: r/m32 is EAX 274 +run: SF=0; ZF=1; OF=0 275 276 //:: copy (mov) 277 278 :(before "End Initialize Op Names(name)") -279 put(name, "89", "copy r32 to rm32"); +279 put(name, "89", "copy r32 to rm32"); 280 281 :(scenario copy_r32_to_r32) 282 % Reg[EBX].i = 0xaf; @@ -349,25 +349,25 @@ if ('onhashchange' in window) { 284 # op ModR/M SIB displacement immediate 285 89 d8 # copy EBX to EAX 286 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -287 +run: copy EBX to r/m32 -288 +run: r/m32 is EAX +287 +run: copy EBX to r/m32 +288 +run: r/m32 is EAX 289 +run: storing 0x000000af 290 291 :(before "End Single-Byte Opcodes") 292 case 0x89: { // copy r32 to r/m32 -293 uint8_t modrm = next(); +293 uint8_t modrm = next(); 294 uint8_t reg2 = (modrm>>3)&0x7; -295 trace(90, "run") << "copy " << rname(reg2) << " to r/m32" << end(); +295 trace(90, "run") << "copy " << rname(reg2) << " to r/m32" << end(); 296 int32_t* arg1 = effective_address(modrm); 297 *arg1 = Reg[reg2].i; -298 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); +298 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); 299 break; 300 } 301 302 //:: xchg 303 304 :(before "End Initialize Op Names(name)") -305 put(name, "87", "swap the contents of r32 and rm32"); +305 put(name, "87", "swap the contents of r32 and rm32"); 306 307 :(scenario xchg_r32_with_r32) 308 % Reg[EBX].i = 0xaf; @@ -376,36 +376,36 @@ if ('onhashchange' in window) { 311 # op ModR/M SIB displacement immediate 312 87 d8 # exchange EBX with EAX 313 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -314 +run: exchange EBX with r/m32 -315 +run: r/m32 is EAX +314 +run: exchange EBX with r/m32 +315 +run: r/m32 is EAX 316 +run: storing 0x000000af in r/m32 -317 +run: storing 0x0000002e in EBX +317 +run: storing 0x0000002e in EBX 318 319 :(before "End Single-Byte Opcodes") 320 case 0x87: { // exchange r32 with r/m32 -321 uint8_t modrm = next(); +321 uint8_t modrm = next(); 322 uint8_t reg2 = (modrm>>3)&0x7; -323 trace(90, "run") << "exchange " << rname(reg2) << " with r/m32" << end(); +323 trace(90, "run") << "exchange " << rname(reg2) << " with r/m32" << end(); 324 int32_t* arg1 = effective_address(modrm); 325 int32_t tmp = *arg1; 326 *arg1 = Reg[reg2].i; 327 Reg[reg2].i = tmp; -328 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end(); -329 trace(90, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end(); +328 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end(); +329 trace(90, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end(); 330 break; 331 } 332 333 //:: push 334 335 :(before "End Initialize Op Names(name)") -336 put(name, "50", "push R0 (EAX) to stack"); -337 put(name, "51", "push R1 (ECX) to stack"); -338 put(name, "52", "push R2 (EDX) to stack"); -339 put(name, "53", "push R3 (EBX) to stack"); -340 put(name, "54", "push R4 (ESP) to stack"); -341 put(name, "55", "push R5 (EBP) to stack"); -342 put(name, "56", "push R6 (ESI) to stack"); -343 put(name, "57", "push R7 (EDI) to stack"); +336 put(name, "50", "push R0 (EAX) to stack"); +337 put(name, "51", "push R1 (ECX) to stack"); +338 put(name, "52", "push R2 (EDX) to stack"); +339 put(name, "53", "push R3 (EBX) to stack"); +340 put(name, "54", "push R4 (ESP) to stack"); +341 put(name, "55", "push R5 (EBP) to stack"); +342 put(name, "56", "push R6 (ESI) to stack"); +343 put(name, "57", "push R7 (EDI) to stack"); 344 345 :(scenario push_r32) 346 % Reg[ESP].u = 0x64; @@ -413,8 +413,8 @@ if ('onhashchange' in window) { 348 == 0x1 349 # op ModR/M SIB displacement immediate 350 53 # push EBX to stack -351 +run: push EBX -352 +run: decrementing ESP to 0x00000060 +351 +run: push EBX +352 +run: decrementing ESP to 0x00000060 353 +run: pushing value 0x0000000a 354 355 :(before "End Single-Byte Opcodes") @@ -426,8 +426,8 @@ if ('onhashchange' in window) { 361 case 0x55: 362 case 0x56: 363 case 0x57: { // push r32 to stack -364 uint8_t reg = op & 0x7; -365 trace(90, "run") << "push " << rname(reg) << end(); +364 uint8_t reg = op & 0x7; +365 trace(90, "run") << "push " << rname(reg) << end(); 366 //? cerr << "push: " << NUM(reg) << ": " << Reg[reg].u << " => " << Reg[ESP].u << '\n'; 367 push(Reg[reg].u); 368 break; @@ -435,22 +435,22 @@ if ('onhashchange' in window) { 370 :(code) 371 void push(uint32_t val) { 372 Reg[ESP].u -= 4; -373 trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); -374 trace(90, "run") << "pushing value 0x" << HEXWORD << val << end(); -375 write_mem_u32(Reg[ESP].u, val); +373 trace(90, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); +374 trace(90, "run") << "pushing value 0x" << HEXWORD << val << end(); +375 write_mem_u32(Reg[ESP].u, val); 376 } 377 378 //:: pop 379 380 :(before "End Initialize Op Names(name)") -381 put(name, "58", "pop top of stack to R0 (EAX)"); -382 put(name, "59", "pop top of stack to R1 (ECX)"); -383 put(name, "5a", "pop top of stack to R2 (EDX)"); -384 put(name, "5b", "pop top of stack to R3 (EBX)"); -385 put(name, "5c", "pop top of stack to R4 (ESP)"); -386 put(name, "5d", "pop top of stack to R5 (EBP)"); -387 put(name, "5e", "pop top of stack to R6 (ESI)"); -388 put(name, "5f", "pop top of stack to R7 (EDI)"); +381 put(name, "58", "pop top of stack to R0 (EAX)"); +382 put(name, "59", "pop top of stack to R1 (ECX)"); +383 put(name, "5a", "pop top of stack to R2 (EDX)"); +384 put(name, "5b", "pop top of stack to R3 (EBX)"); +385 put(name, "5c", "pop top of stack to R4 (ESP)"); +386 put(name, "5d", "pop top of stack to R5 (EBP)"); +387 put(name, "5e", "pop top of stack to R6 (ESI)"); +388 put(name, "5f", "pop top of stack to R7 (EDI)"); 389 390 :(scenario pop_r32) 391 % Reg[ESP].u = 0x60; @@ -460,9 +460,9 @@ if ('onhashchange' in window) { 395 5b # pop stack to EBX 396 == 0x60 # data segment 397 0a 00 00 00 # 0x0a -398 +run: pop into EBX +398 +run: pop into EBX 399 +run: popping value 0x0000000a -400 +run: incrementing ESP to 0x00000064 +400 +run: incrementing ESP to 0x00000064 401 402 :(before "End Single-Byte Opcodes") 403 case 0x58: @@ -473,8 +473,8 @@ if ('onhashchange' in window) { 408 case 0x5d: 409 case 0x5e: 410 case 0x5f: { // pop stack into r32 -411 uint8_t reg = op & 0x7; -412 trace(90, "run") << "pop into " << rname(reg) << end(); +411 uint8_t reg = op & 0x7; +412 trace(90, "run") << "pop into " << rname(reg) << end(); 413 //? cerr << "pop from " << Reg[ESP].u << '\n'; 414 Reg[reg].u = pop(); 415 //? cerr << "=> " << NUM(reg) << ": " << Reg[reg].u << '\n'; @@ -482,10 +482,10 @@ if ('onhashchange' in window) { 417 } 418 :(code) 419 uint32_t pop() { -420 uint32_t result = read_mem_u32(Reg[ESP].u); -421 trace(90, "run") << "popping value 0x" << HEXWORD << result << end(); +420 uint32_t result = read_mem_u32(Reg[ESP].u); +421 trace(90, "run") << "popping value 0x" << HEXWORD << result << end(); 422 Reg[ESP].u += 4; -423 trace(90, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); +423 trace(90, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); 424 return result; 425 } diff --git a/html/subx/014indirect_addressing.cc.html b/html/subx/014indirect_addressing.cc.html index 458e0e78..43b9e5be 100644 --- a/html/subx/014indirect_addressing.cc.html +++ b/html/subx/014indirect_addressing.cc.html @@ -74,7 +74,7 @@ if ('onhashchange' in window) { 10 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 11 == 0x60 # data segment 12 01 00 00 00 # 1 - 13 +run: add EBX to r/m32 + 13 +run: add EBX to r/m32 14 +run: effective address is 0x60 (EAX) 15 +run: storing 0x00000011 16 @@ -82,7 +82,7 @@ if ('onhashchange' in window) { 18 case 0: // indirect addressing 19 switch (rm) { 20 default: // address in register - 21 trace(90, "run") << "effective address is 0x" << std::hex << Reg[rm].u << " (" << rname(rm) << ")" << end(); + 21 trace(90, "run") << "effective address is 0x" << std::hex << Reg[rm].u << " (" << rname(rm) << ")" << end(); 22 addr = Reg[rm].u; 23 break; 24 // End Mod 0 Special-cases(addr) @@ -92,7 +92,7 @@ if ('onhashchange' in window) { 28 //: 29 30 :(before "End Initialize Op Names(name)") - 31 put(name, "03", "add rm32 to r32"); + 31 put(name, "03", "add rm32 to r32"); 32 33 :(scenario add_mem_at_r32_to_r32) 34 % Reg[EAX].i = 0x60; @@ -103,17 +103,17 @@ if ('onhashchange' in window) { 39 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 40 == 0x60 # data segment 41 01 00 00 00 # 1 - 42 +run: add r/m32 to EBX + 42 +run: add r/m32 to EBX 43 +run: effective address is 0x60 (EAX) 44 +run: storing 0x00000011 45 46 :(before "End Single-Byte Opcodes") 47 case 0x03: { // add r/m32 to r32 - 48 uint8_t modrm = next(); + 48 uint8_t modrm = next(); 49 uint8_t arg1 = (modrm>>3)&0x7; - 50 trace(90, "run") << "add r/m32 to " << rname(arg1) << end(); + 50 trace(90, "run") << "add r/m32 to " << rname(arg1) << end(); 51 const int32_t* arg2 = effective_address(modrm); - 52 BINARY_ARITHMETIC_OP(+, Reg[arg1].i, *arg2); + 52 BINARY_ARITHMETIC_OP(+, Reg[arg1].i, *arg2); 53 break; 54 } 55 @@ -128,14 +128,14 @@ if ('onhashchange' in window) { 64 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 65 == 0x60 # data segment 66 0a 00 00 00 # 10 - 67 +run: subtract EBX from r/m32 + 67 +run: subtract EBX from r/m32 68 +run: effective address is 0x60 (EAX) 69 +run: storing 0x00000009 70 71 //: 72 73 :(before "End Initialize Op Names(name)") - 74 put(name, "2b", "subtract rm32 from r32"); + 74 put(name, "2b", "subtract rm32 from r32"); 75 76 :(scenario subtract_mem_at_r32_from_r32) 77 % Reg[EAX].i = 0x60; @@ -146,17 +146,17 @@ if ('onhashchange' in window) { 82 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 83 == 0x60 # data segment 84 01 00 00 00 # 1 - 85 +run: subtract r/m32 from EBX + 85 +run: subtract r/m32 from EBX 86 +run: effective address is 0x60 (EAX) 87 +run: storing 0x00000009 88 89 :(before "End Single-Byte Opcodes") 90 case 0x2b: { // subtract r/m32 from r32 - 91 uint8_t modrm = next(); + 91 uint8_t modrm = next(); 92 uint8_t arg1 = (modrm>>3)&0x7; - 93 trace(90, "run") << "subtract r/m32 from " << rname(arg1) << end(); + 93 trace(90, "run") << "subtract r/m32 from " << rname(arg1) << end(); 94 const int32_t* arg2 = effective_address(modrm); - 95 BINARY_ARITHMETIC_OP(-, Reg[arg1].i, *arg2); + 95 BINARY_ARITHMETIC_OP(-, Reg[arg1].i, *arg2); 96 break; 97 } 98 @@ -171,14 +171,14 @@ if ('onhashchange' in window) { 107 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 108 == 0x60 # data segment 109 0d 0c 0b 0a # 0x0a0b0c0d -110 +run: and EBX with r/m32 +110 +run: and EBX with r/m32 111 +run: effective address is 0x60 (EAX) 112 +run: storing 0x0000000d 113 114 //: 115 116 :(before "End Initialize Op Names(name)") -117 put(name, "23", "r32 = bitwise AND of r32 with rm32"); +117 put(name, "23", "r32 = bitwise AND of r32 with rm32"); 118 119 :(scenario and_mem_at_r32_with_r32) 120 % Reg[EAX].i = 0x60; @@ -189,17 +189,17 @@ if ('onhashchange' in window) { 125 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 126 == 0x60 # data segment 127 ff 00 00 00 # 0xff -128 +run: and r/m32 with EBX +128 +run: and r/m32 with EBX 129 +run: effective address is 0x60 (EAX) 130 +run: storing 0x0000000d 131 132 :(before "End Single-Byte Opcodes") 133 case 0x23: { // and r/m32 with r32 -134 uint8_t modrm = next(); +134 uint8_t modrm = next(); 135 uint8_t arg1 = (modrm>>3)&0x7; -136 trace(90, "run") << "and r/m32 with " << rname(arg1) << end(); +136 trace(90, "run") << "and r/m32 with " << rname(arg1) << end(); 137 const int32_t* arg2 = effective_address(modrm); -138 BINARY_BITWISE_OP(&, Reg[arg1].u, *arg2); +138 BINARY_BITWISE_OP(&, Reg[arg1].u, *arg2); 139 break; 140 } 141 @@ -214,14 +214,14 @@ if ('onhashchange' in window) { 150 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 151 == 0x60 # data segment 152 0d 0c 0b 0a # 0x0a0b0c0d -153 +run: or EBX with r/m32 +153 +run: or EBX with r/m32 154 +run: effective address is 0x60 (EAX) 155 +run: storing 0xaabbccdd 156 157 //: 158 159 :(before "End Initialize Op Names(name)") -160 put(name, "0b", "r32 = bitwise OR of r32 with rm32"); +160 put(name, "0b", "r32 = bitwise OR of r32 with rm32"); 161 162 :(scenario or_mem_at_r32_with_r32) 163 % Reg[EAX].i = 0x60; @@ -232,17 +232,17 @@ if ('onhashchange' in window) { 168 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 169 == 0x60 # data segment 170 0d 0c 0b 0a # 0x0a0b0c0d -171 +run: or r/m32 with EBX +171 +run: or r/m32 with EBX 172 +run: effective address is 0x60 (EAX) 173 +run: storing 0xaabbccdd 174 175 :(before "End Single-Byte Opcodes") 176 case 0x0b: { // or r/m32 with r32 -177 uint8_t modrm = next(); +177 uint8_t modrm = next(); 178 uint8_t arg1 = (modrm>>3)&0x7; -179 trace(90, "run") << "or r/m32 with " << rname(arg1) << end(); +179 trace(90, "run") << "or r/m32 with " << rname(arg1) << end(); 180 const int32_t* arg2 = effective_address(modrm); -181 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); +181 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); 182 break; 183 } 184 @@ -257,14 +257,14 @@ if ('onhashchange' in window) { 193 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 194 == 0x60 # data segment 195 0d 0c bb aa # 0xaabb0c0d -196 +run: xor EBX with r/m32 +196 +run: xor EBX with r/m32 197 +run: effective address is 0x60 (EAX) 198 +run: storing 0x0a0bccdd 199 200 //: 201 202 :(before "End Initialize Op Names(name)") -203 put(name, "33", "r32 = bitwise XOR of r32 with rm32"); +203 put(name, "33", "r32 = bitwise XOR of r32 with rm32"); 204 205 :(scenario xor_mem_at_r32_with_r32) 206 % Reg[EAX].i = 0x60; @@ -275,17 +275,17 @@ if ('onhashchange' in window) { 211 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 212 == 0x60 # data segment 213 0d 0c 0b 0a # 0x0a0b0c0d -214 +run: xor r/m32 with EBX +214 +run: xor r/m32 with EBX 215 +run: effective address is 0x60 (EAX) 216 +run: storing 0xaabbccdd 217 218 :(before "End Single-Byte Opcodes") 219 case 0x33: { // xor r/m32 with r32 -220 uint8_t modrm = next(); +220 uint8_t modrm = next(); 221 uint8_t arg1 = (modrm>>3)&0x7; -222 trace(90, "run") << "xor r/m32 with " << rname(arg1) << end(); +222 trace(90, "run") << "xor r/m32 with " << rname(arg1) << end(); 223 const int32_t* arg2 = effective_address(modrm); -224 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); +224 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); 225 break; 226 } 227 @@ -314,7 +314,7 @@ if ('onhashchange' in window) { 250 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 251 == 0x60 # data segment 252 0d 0c 0b 0a # 0x0a0b0c0d -253 +run: compare EBX with r/m32 +253 +run: compare EBX with r/m32 254 +run: effective address is 0x60 (EAX) 255 +run: SF=0; ZF=0; OF=0 256 @@ -327,7 +327,7 @@ if ('onhashchange' in window) { 263 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 264 == 0x60 # data segment 265 07 0c 0b 0a # 0x0a0b0c0d -266 +run: compare EBX with r/m32 +266 +run: compare EBX with r/m32 267 +run: effective address is 0x60 (EAX) 268 +run: SF=1; ZF=0; OF=0 269 @@ -340,14 +340,14 @@ if ('onhashchange' in window) { 276 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 277 == 0x60 # data segment 278 0d 0c 0b 0a # 0x0a0b0c0d -279 +run: compare EBX with r/m32 +279 +run: compare EBX with r/m32 280 +run: effective address is 0x60 (EAX) 281 +run: SF=0; ZF=1; OF=0 282 283 //: 284 285 :(before "End Initialize Op Names(name)") -286 put(name, "3b", "set SF if rm32 > r32"); +286 put(name, "3b", "set SF if rm32 > r32"); 287 288 :(scenario compare_r32_with_mem_at_r32_greater) 289 % Reg[EAX].i = 0x60; @@ -358,23 +358,23 @@ if ('onhashchange' in window) { 294 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 295 == 0x60 # data segment 296 07 0c 0b 0a # 0x0a0b0c0d -297 +run: compare r/m32 with EBX +297 +run: compare r/m32 with EBX 298 +run: effective address is 0x60 (EAX) 299 +run: SF=0; ZF=0; OF=0 300 301 :(before "End Single-Byte Opcodes") 302 case 0x3b: { // set SF if r32 < r/m32 -303 uint8_t modrm = next(); +303 uint8_t modrm = next(); 304 uint8_t reg1 = (modrm>>3)&0x7; -305 trace(90, "run") << "compare r/m32 with " << rname(reg1) << end(); +305 trace(90, "run") << "compare r/m32 with " << rname(reg1) << end(); 306 int32_t arg1 = Reg[reg1].i; 307 int32_t* arg2 = effective_address(modrm); 308 int32_t tmp1 = arg1 - *arg2; 309 SF = (tmp1 < 0); 310 ZF = (tmp1 == 0); 311 int64_t tmp2 = arg1 - *arg2; -312 OF = (tmp1 != tmp2); -313 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +312 OF = (tmp1 != tmp2); +313 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 314 break; 315 } 316 @@ -387,7 +387,7 @@ if ('onhashchange' in window) { 323 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 324 == 0x60 # data segment 325 0d 0c 0b 0a # 0x0a0b0c0d -326 +run: compare r/m32 with EBX +326 +run: compare r/m32 with EBX 327 +run: effective address is 0x60 (EAX) 328 +run: SF=1; ZF=0; OF=0 329 @@ -400,7 +400,7 @@ if ('onhashchange' in window) { 336 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 337 == 0x60 # data segment 338 0d 0c 0b 0a # 0x0a0b0c0d -339 +run: compare r/m32 with EBX +339 +run: compare r/m32 with EBX 340 +run: effective address is 0x60 (EAX) 341 +run: SF=0; ZF=1; OF=0 342 @@ -413,14 +413,14 @@ if ('onhashchange' in window) { 349 # op ModR/M SIB displacement immediate 350 89 18 # copy EBX to *EAX 351 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) -352 +run: copy EBX to r/m32 +352 +run: copy EBX to r/m32 353 +run: effective address is 0x60 (EAX) 354 +run: storing 0x000000af 355 356 //: 357 358 :(before "End Initialize Op Names(name)") -359 put(name, "8b", "copy rm32 to r32"); +359 put(name, "8b", "copy rm32 to r32"); 360 361 :(scenario copy_mem_at_r32_to_r32) 362 % Reg[EAX].i = 0x60; @@ -430,25 +430,25 @@ if ('onhashchange' in window) { 366 # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) 367 == 0x60 # data segment 368 af 00 00 00 # 0xaf -369 +run: copy r/m32 to EBX +369 +run: copy r/m32 to EBX 370 +run: effective address is 0x60 (EAX) 371 +run: storing 0x000000af 372 373 :(before "End Single-Byte Opcodes") 374 case 0x8b: { // copy r32 to r/m32 -375 uint8_t modrm = next(); +375 uint8_t modrm = next(); 376 uint8_t reg1 = (modrm>>3)&0x7; -377 trace(90, "run") << "copy r/m32 to " << rname(reg1) << end(); +377 trace(90, "run") << "copy r/m32 to " << rname(reg1) << end(); 378 int32_t* arg2 = effective_address(modrm); 379 Reg[reg1].i = *arg2; -380 trace(90, "run") << "storing 0x" << HEXWORD << *arg2 << end(); +380 trace(90, "run") << "storing 0x" << HEXWORD << *arg2 << end(); 381 break; 382 } 383 384 //:: jump 385 386 :(before "End Initialize Op Names(name)") -387 put(name, "ff", "jump/push/call rm32 based on subop"); +387 put(name, "ff", "jump/push/call rm32 based on subop"); 388 389 :(scenario jump_mem_at_r32) 390 % Reg[EAX].i = 0x60; @@ -469,14 +469,14 @@ if ('onhashchange' in window) { 405 406 :(before "End Single-Byte Opcodes") 407 case 0xff: { -408 uint8_t modrm = next(); -409 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits -410 switch (subop) { +408 uint8_t modrm = next(); +409 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits +410 switch (subop) { 411 case 4: { // jump to r/m32 -412 trace(90, "run") << "jump to r/m32" << end(); +412 trace(90, "run") << "jump to r/m32" << end(); 413 int32_t* arg2 = effective_address(modrm); -414 EIP = *arg2; -415 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +414 EIP = *arg2; +415 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 416 break; 417 } 418 // End Op ff Subops @@ -497,12 +497,12 @@ if ('onhashchange' in window) { 433 af 00 00 00 # 0xaf 434 +run: push r/m32 435 +run: effective address is 0x60 (EAX) -436 +run: decrementing ESP to 0x00000010 +436 +run: decrementing ESP to 0x00000010 437 +run: pushing value 0x000000af 438 439 :(before "End Op ff Subops") 440 case 6: { // push r/m32 to stack -441 trace(90, "run") << "push r/m32" << end(); +441 trace(90, "run") << "push r/m32" << end(); 442 const int32_t* val = effective_address(modrm); 443 push(*val); 444 break; @@ -511,7 +511,7 @@ if ('onhashchange' in window) { 447 //:: pop 448 449 :(before "End Initialize Op Names(name)") -450 put(name, "8f", "pop top of stack to rm32"); +450 put(name, "8f", "pop top of stack to rm32"); 451 452 :(scenario pop_mem_at_r32) 453 % Reg[EAX].i = 0x60; @@ -525,15 +525,15 @@ if ('onhashchange' in window) { 461 +run: pop into r/m32 462 +run: effective address is 0x60 (EAX) 463 +run: popping value 0x00000030 -464 +run: incrementing ESP to 0x00000014 +464 +run: incrementing ESP to 0x00000014 465 466 :(before "End Single-Byte Opcodes") 467 case 0x8f: { // pop stack into r/m32 -468 uint8_t modrm = next(); -469 uint8_t subop = (modrm>>3)&0x7; -470 switch (subop) { +468 uint8_t modrm = next(); +469 uint8_t subop = (modrm>>3)&0x7; +470 switch (subop) { 471 case 0: { -472 trace(90, "run") << "pop into r/m32" << end(); +472 trace(90, "run") << "pop into r/m32" << end(); 473 int32_t* dest = effective_address(modrm); 474 *dest = pop(); 475 break; @@ -552,14 +552,14 @@ if ('onhashchange' in window) { 488 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 101 (dest in disp32) 489 == 0x60 # data segment 490 01 00 00 00 # 1 -491 +run: add EBX to r/m32 +491 +run: add EBX to r/m32 492 +run: effective address is 0x60 (disp32) 493 +run: storing 0x00000011 494 495 :(before "End Mod 0 Special-cases(addr)") 496 case 5: // exception: mod 0b00 rm 0b101 => incoming disp32 -497 addr = imm32(); -498 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (disp32)" << end(); +497 addr = imm32(); +498 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (disp32)" << end(); 499 break; 500 501 //: @@ -573,7 +573,7 @@ if ('onhashchange' in window) { 509 # ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX) 510 == 0x60 # data segment 511 01 00 00 00 # 1 -512 +run: add EBX to r/m32 +512 +run: add EBX to r/m32 513 +run: effective address is initially 0x5e (EAX) 514 +run: effective address is 0x60 (after adding disp8) 515 +run: storing 0x00000011 @@ -583,13 +583,13 @@ if ('onhashchange' in window) { 519 switch (rm) { 520 default: 521 addr = Reg[rm].u; -522 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); +522 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); 523 break; 524 // End Mod 1 Special-cases(addr) 525 } 526 if (addr > 0) { -527 addr += static_cast<int8_t>(next()); -528 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp8)" << end(); +527 addr += static_cast<int8_t>(next()); +528 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp8)" << end(); 529 } 530 break; 531 @@ -602,7 +602,7 @@ if ('onhashchange' in window) { 538 # ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX) 539 == 0x60 # data segment 540 01 00 00 00 # 1 -541 +run: add EBX to r/m32 +541 +run: add EBX to r/m32 542 +run: effective address is initially 0x61 (EAX) 543 +run: effective address is 0x60 (after adding disp8) 544 +run: storing 0x00000011 @@ -618,7 +618,7 @@ if ('onhashchange' in window) { 554 # ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX) 555 == 0x60 # data segment 556 01 00 00 00 # 1 -557 +run: add EBX to r/m32 +557 +run: add EBX to r/m32 558 +run: effective address is initially 0x5e (EAX) 559 +run: effective address is 0x60 (after adding disp32) 560 +run: storing 0x00000011 @@ -628,13 +628,13 @@ if ('onhashchange' in window) { 564 switch (rm) { 565 default: 566 addr = Reg[rm].u; -567 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); +567 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); 568 break; 569 // End Mod 2 Special-cases(addr) 570 } 571 if (addr > 0) { -572 addr += imm32(); -573 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp32)" << end(); +572 addr += imm32(); +573 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp32)" << end(); 574 } 575 break; 576 @@ -647,7 +647,7 @@ if ('onhashchange' in window) { 583 # ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX) 584 == 0x60 # data segment 585 01 00 00 00 # 1 -586 +run: add EBX to r/m32 +586 +run: add EBX to r/m32 587 +run: effective address is initially 0x61 (EAX) 588 +run: effective address is 0x60 (after adding disp32) 589 +run: storing 0x00000011 diff --git a/html/subx/015immediate_addressing.cc.html b/html/subx/015immediate_addressing.cc.html index c4f63bd9..caac7579 100644 --- a/html/subx/015immediate_addressing.cc.html +++ b/html/subx/015immediate_addressing.cc.html @@ -65,7 +65,7 @@ if ('onhashchange' in window) { 1 //: instructions that (immediately) contain an argument to act with 2 3 :(before "End Initialize Op Names(name)") - 4 put(name, "81", "combine rm32 with imm32 based on subop"); + 4 put(name, "81", "combine rm32 with imm32 based on subop"); 5 6 :(scenario add_imm32_to_r32) 7 % Reg[EBX].i = 1; @@ -73,26 +73,26 @@ if ('onhashchange' in window) { 9 # op ModR/M SIB displacement immediate 10 81 c3 0a 0b 0c 0d # add 0x0d0c0b0a to EBX 11 # ModR/M in binary: 11 (direct mode) 000 (add imm32) 011 (dest EBX) - 12 +run: combine imm32 0x0d0c0b0a with r/m32 - 13 +run: r/m32 is EBX - 14 +run: subop add + 12 +run: combine imm32 0x0d0c0b0a with r/m32 + 13 +run: r/m32 is EBX + 14 +run: subop add 15 +run: storing 0x0d0c0b0b 16 17 :(before "End Single-Byte Opcodes") 18 case 0x81: { // combine imm32 with r/m32 - 19 uint8_t modrm = next(); - 20 int32_t arg2 = imm32(); - 21 trace(90, "run") << "combine imm32 0x" << HEXWORD << arg2 << " with r/m32" << end(); + 19 uint8_t modrm = next(); + 20 int32_t arg2 = imm32(); + 21 trace(90, "run") << "combine imm32 0x" << HEXWORD << arg2 << " with r/m32" << end(); 22 int32_t* arg1 = effective_address(modrm); - 23 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits - 24 switch (subop) { + 23 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits + 24 switch (subop) { 25 case 0: - 26 trace(90, "run") << "subop add" << end(); - 27 BINARY_ARITHMETIC_OP(+, *arg1, arg2); + 26 trace(90, "run") << "subop add" << end(); + 27 BINARY_ARITHMETIC_OP(+, *arg1, arg2); 28 break; 29 // End Op 81 Subops 30 default: - 31 cerr << "unrecognized sub-opcode after 81: " << NUM(subop) << '\n'; + 31 cerr << "unrecognized sub-opcode after 81: " << NUM(subop) << '\n'; 32 exit(1); 33 } 34 break; @@ -108,29 +108,29 @@ if ('onhashchange' in window) { 44 # ModR/M in binary: 00 (indirect mode) 000 (add imm32) 011 (dest EBX) 45 == 0x60 # data segment 46 01 00 00 00 # 1 - 47 +run: combine imm32 0x0d0c0b0a with r/m32 + 47 +run: combine imm32 0x0d0c0b0a with r/m32 48 +run: effective address is 0x60 (EBX) - 49 +run: subop add + 49 +run: subop add 50 +run: storing 0x0d0c0b0b 51 52 //:: subtract 53 54 :(before "End Initialize Op Names(name)") - 55 put(name, "2d", "subtract imm32 from R0 (EAX)"); + 55 put(name, "2d", "subtract imm32 from R0 (EAX)"); 56 57 :(scenario subtract_imm32_from_eax) 58 % Reg[EAX].i = 0x0d0c0baa; 59 == 0x1 60 # op ModR/M SIB displacement immediate 61 2d 0a 0b 0c 0d # subtract 0x0d0c0b0a from EAX - 62 +run: subtract imm32 0x0d0c0b0a from EAX + 62 +run: subtract imm32 0x0d0c0b0a from EAX 63 +run: storing 0x000000a0 64 65 :(before "End Single-Byte Opcodes") 66 case 0x2d: { // subtract imm32 from EAX - 67 int32_t arg2 = imm32(); - 68 trace(90, "run") << "subtract imm32 0x" << HEXWORD << arg2 << " from EAX" << end(); - 69 BINARY_ARITHMETIC_OP(-, Reg[EAX].i, arg2); + 67 int32_t arg2 = imm32(); + 68 trace(90, "run") << "subtract imm32 0x" << HEXWORD << arg2 << " from EAX" << end(); + 69 BINARY_ARITHMETIC_OP(-, Reg[EAX].i, arg2); 70 break; 71 } 72 @@ -144,15 +144,15 @@ if ('onhashchange' in window) { 80 # ModR/M in binary: 00 (indirect mode) 101 (subtract imm32) 011 (dest EBX) 81 == 0x60 # data segment 82 0a 00 00 00 # 10 - 83 +run: combine imm32 0x00000001 with r/m32 + 83 +run: combine imm32 0x00000001 with r/m32 84 +run: effective address is 0x60 (EBX) - 85 +run: subop subtract + 85 +run: subop subtract 86 +run: storing 0x00000009 87 88 :(before "End Op 81 Subops") 89 case 5: { - 90 trace(90, "run") << "subop subtract" << end(); - 91 BINARY_ARITHMETIC_OP(-, *arg1, arg2); + 90 trace(90, "run") << "subop subtract" << end(); + 91 BINARY_ARITHMETIC_OP(-, *arg1, arg2); 92 break; 93 } 94 @@ -164,29 +164,29 @@ if ('onhashchange' in window) { 100 # op ModR/M SIB displacement immediate 101 81 eb 01 00 00 00 # subtract 1 from EBX 102 # ModR/M in binary: 11 (direct mode) 101 (subtract imm32) 011 (dest EBX) -103 +run: combine imm32 0x00000001 with r/m32 -104 +run: r/m32 is EBX -105 +run: subop subtract +103 +run: combine imm32 0x00000001 with r/m32 +104 +run: r/m32 is EBX +105 +run: subop subtract 106 +run: storing 0x00000009 107 108 //:: and 109 110 :(before "End Initialize Op Names(name)") -111 put(name, "25", "R0 = bitwise AND of imm32 with R0 (EAX)"); +111 put(name, "25", "R0 = bitwise AND of imm32 with R0 (EAX)"); 112 113 :(scenario and_imm32_with_eax) 114 % Reg[EAX].i = 0xff; 115 == 0x1 116 # op ModR/M SIB displacement immediate 117 25 0a 0b 0c 0d # and 0x0d0c0b0a with EAX -118 +run: and imm32 0x0d0c0b0a with EAX +118 +run: and imm32 0x0d0c0b0a with EAX 119 +run: storing 0x0000000a 120 121 :(before "End Single-Byte Opcodes") 122 case 0x25: { // and imm32 with EAX -123 int32_t arg2 = imm32(); -124 trace(90, "run") << "and imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); -125 BINARY_BITWISE_OP(&, Reg[EAX].i, arg2); +123 int32_t arg2 = imm32(); +124 trace(90, "run") << "and imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); +125 BINARY_BITWISE_OP(&, Reg[EAX].i, arg2); 126 break; 127 } 128 @@ -200,15 +200,15 @@ if ('onhashchange' in window) { 136 # ModR/M in binary: 00 (indirect mode) 100 (and imm32) 011 (dest EBX) 137 == 0x60 # data segment 138 ff 00 00 00 # 0xff -139 +run: combine imm32 0x0d0c0b0a with r/m32 +139 +run: combine imm32 0x0d0c0b0a with r/m32 140 +run: effective address is 0x60 (EBX) -141 +run: subop and +141 +run: subop and 142 +run: storing 0x0000000a 143 144 :(before "End Op 81 Subops") 145 case 4: { -146 trace(90, "run") << "subop and" << end(); -147 BINARY_BITWISE_OP(&, *arg1, arg2); +146 trace(90, "run") << "subop and" << end(); +147 BINARY_BITWISE_OP(&, *arg1, arg2); 148 break; 149 } 150 @@ -220,29 +220,29 @@ if ('onhashchange' in window) { 156 # op ModR/M SIB displacement immediate 157 81 e3 0a 0b 0c 0d # and 0x0d0c0b0a with EBX 158 # ModR/M in binary: 11 (direct mode) 100 (and imm32) 011 (dest EBX) -159 +run: combine imm32 0x0d0c0b0a with r/m32 -160 +run: r/m32 is EBX -161 +run: subop and +159 +run: combine imm32 0x0d0c0b0a with r/m32 +160 +run: r/m32 is EBX +161 +run: subop and 162 +run: storing 0x0000000a 163 164 //:: or 165 166 :(before "End Initialize Op Names(name)") -167 put(name, "0d", "R0 = bitwise OR of imm32 with R0 (EAX)"); +167 put(name, "0d", "R0 = bitwise OR of imm32 with R0 (EAX)"); 168 169 :(scenario or_imm32_with_eax) 170 % Reg[EAX].i = 0xd0c0b0a0; 171 == 0x1 172 # op ModR/M SIB displacement immediate 173 0d 0a 0b 0c 0d # or 0x0d0c0b0a with EAX -174 +run: or imm32 0x0d0c0b0a with EAX +174 +run: or imm32 0x0d0c0b0a with EAX 175 +run: storing 0xddccbbaa 176 177 :(before "End Single-Byte Opcodes") 178 case 0x0d: { // or imm32 with EAX -179 int32_t arg2 = imm32(); -180 trace(90, "run") << "or imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); -181 BINARY_BITWISE_OP(|, Reg[EAX].i, arg2); +179 int32_t arg2 = imm32(); +180 trace(90, "run") << "or imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); +181 BINARY_BITWISE_OP(|, Reg[EAX].i, arg2); 182 break; 183 } 184 @@ -256,15 +256,15 @@ if ('onhashchange' in window) { 192 # ModR/M in binary: 00 (indirect mode) 001 (or imm32) 011 (dest EBX) 193 == 0x60 # data segment 194 a0 b0 c0 d0 # 0xd0c0b0a0 -195 +run: combine imm32 0x0d0c0b0a with r/m32 +195 +run: combine imm32 0x0d0c0b0a with r/m32 196 +run: effective address is 0x60 (EBX) -197 +run: subop or +197 +run: subop or 198 +run: storing 0xddccbbaa 199 200 :(before "End Op 81 Subops") 201 case 1: { -202 trace(90, "run") << "subop or" << end(); -203 BINARY_BITWISE_OP(|, *arg1, arg2); +202 trace(90, "run") << "subop or" << end(); +203 BINARY_BITWISE_OP(|, *arg1, arg2); 204 break; 205 } 206 @@ -274,29 +274,29 @@ if ('onhashchange' in window) { 210 # op ModR/M SIB displacement immediate 211 81 cb 0a 0b 0c 0d # or 0x0d0c0b0a with EBX 212 # ModR/M in binary: 11 (direct mode) 001 (or imm32) 011 (dest EBX) -213 +run: combine imm32 0x0d0c0b0a with r/m32 -214 +run: r/m32 is EBX -215 +run: subop or +213 +run: combine imm32 0x0d0c0b0a with r/m32 +214 +run: r/m32 is EBX +215 +run: subop or 216 +run: storing 0xddccbbaa 217 218 //:: xor 219 220 :(before "End Initialize Op Names(name)") -221 put(name, "35", "R0 = bitwise XOR of imm32 with R0 (EAX)"); +221 put(name, "35", "R0 = bitwise XOR of imm32 with R0 (EAX)"); 222 223 :(scenario xor_imm32_with_eax) 224 % Reg[EAX].i = 0xddccb0a0; 225 == 0x1 226 # op ModR/M SIB displacement immediate 227 35 0a 0b 0c 0d # xor 0x0d0c0b0a with EAX -228 +run: xor imm32 0x0d0c0b0a with EAX +228 +run: xor imm32 0x0d0c0b0a with EAX 229 +run: storing 0xd0c0bbaa 230 231 :(before "End Single-Byte Opcodes") 232 case 0x35: { // xor imm32 with EAX -233 int32_t arg2 = imm32(); -234 trace(90, "run") << "xor imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); -235 BINARY_BITWISE_OP(^, Reg[EAX].i, arg2); +233 int32_t arg2 = imm32(); +234 trace(90, "run") << "xor imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); +235 BINARY_BITWISE_OP(^, Reg[EAX].i, arg2); 236 break; 237 } 238 @@ -310,15 +310,15 @@ if ('onhashchange' in window) { 246 # ModR/M in binary: 00 (indirect mode) 110 (xor imm32) 011 (dest EBX) 247 == 0x60 # data segment 248 a0 b0 c0 d0 # 0xd0c0b0a0 -249 +run: combine imm32 0x0d0c0b0a with r/m32 +249 +run: combine imm32 0x0d0c0b0a with r/m32 250 +run: effective address is 0x60 (EBX) -251 +run: subop xor +251 +run: subop xor 252 +run: storing 0xddccbbaa 253 254 :(before "End Op 81 Subops") 255 case 6: { -256 trace(90, "run") << "subop xor" << end(); -257 BINARY_BITWISE_OP(^, *arg1, arg2); +256 trace(90, "run") << "subop xor" << end(); +257 BINARY_BITWISE_OP(^, *arg1, arg2); 258 break; 259 } 260 @@ -328,35 +328,35 @@ if ('onhashchange' in window) { 264 # op ModR/M SIB displacement immediate 265 81 f3 0a 0b 0c 0d # xor 0x0d0c0b0a with EBX 266 # ModR/M in binary: 11 (direct mode) 110 (xor imm32) 011 (dest EBX) -267 +run: combine imm32 0x0d0c0b0a with r/m32 -268 +run: r/m32 is EBX -269 +run: subop xor +267 +run: combine imm32 0x0d0c0b0a with r/m32 +268 +run: r/m32 is EBX +269 +run: subop xor 270 +run: storing 0xddccbbaa 271 272 //:: compare (cmp) 273 274 :(before "End Initialize Op Names(name)") -275 put(name, "3d", "subtract imm32 from R0 (EAX)"); +275 put(name, "3d", "subtract imm32 from R0 (EAX)"); 276 277 :(scenario compare_imm32_with_eax_greater) 278 % Reg[EAX].i = 0x0d0c0b0a; 279 == 0x1 280 # op ModR/M SIB displacement immediate 281 3d 07 0b 0c 0d # compare 0x0d0c0b07 with EAX -282 +run: compare EAX and imm32 0x0d0c0b07 +282 +run: compare EAX and imm32 0x0d0c0b07 283 +run: SF=0; ZF=0; OF=0 284 285 :(before "End Single-Byte Opcodes") 286 case 0x3d: { // subtract imm32 from EAX 287 int32_t arg1 = Reg[EAX].i; -288 int32_t arg2 = imm32(); -289 trace(90, "run") << "compare EAX and imm32 0x" << HEXWORD << arg2 << end(); +288 int32_t arg2 = imm32(); +289 trace(90, "run") << "compare EAX and imm32 0x" << HEXWORD << arg2 << end(); 290 int32_t tmp1 = arg1 - arg2; 291 SF = (tmp1 < 0); 292 ZF = (tmp1 == 0); 293 int64_t tmp2 = arg1 - arg2; -294 OF = (tmp1 != tmp2); -295 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +294 OF = (tmp1 != tmp2); +295 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 296 break; 297 } 298 @@ -365,7 +365,7 @@ if ('onhashchange' in window) { 301 == 0x1 302 # op ModR/M SIB displacement immediate 303 3d 0a 0b 0c 0d # compare 0x0d0c0b0a with EAX -304 +run: compare EAX and imm32 0x0d0c0b0a +304 +run: compare EAX and imm32 0x0d0c0b0a 305 +run: SF=1; ZF=0; OF=0 306 307 :(scenario compare_imm32_with_eax_equal) @@ -373,7 +373,7 @@ if ('onhashchange' in window) { 309 == 0x1 310 # op ModR/M SIB displacement immediate 311 3d 0a 0b 0c 0d # compare 0x0d0c0b0a with EAX -312 +run: compare EAX and imm32 0x0d0c0b0a +312 +run: compare EAX and imm32 0x0d0c0b0a 313 +run: SF=0; ZF=1; OF=0 314 315 //: @@ -384,19 +384,19 @@ if ('onhashchange' in window) { 320 # op ModR/M SIB displacement immediate 321 81 fb 07 0b 0c 0d # compare 0x0d0c0b07 with EBX 322 # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) -323 +run: combine imm32 0x0d0c0b07 with r/m32 -324 +run: r/m32 is EBX +323 +run: combine imm32 0x0d0c0b07 with r/m32 +324 +run: r/m32 is EBX 325 +run: SF=0; ZF=0; OF=0 326 327 :(before "End Op 81 Subops") 328 case 7: { -329 trace(90, "run") << "subop compare" << end(); +329 trace(90, "run") << "subop compare" << end(); 330 int32_t tmp1 = *arg1 - arg2; 331 SF = (tmp1 < 0); 332 ZF = (tmp1 == 0); 333 int64_t tmp2 = *arg1 - arg2; -334 OF = (tmp1 != tmp2); -335 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +334 OF = (tmp1 != tmp2); +335 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); 336 break; 337 } 338 @@ -406,8 +406,8 @@ if ('onhashchange' in window) { 342 # op ModR/M SIB displacement immediate 343 81 fb 0a 0b 0c 0d # compare 0x0d0c0b0a with EBX 344 # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) -345 +run: combine imm32 0x0d0c0b0a with r/m32 -346 +run: r/m32 is EBX +345 +run: combine imm32 0x0d0c0b0a with r/m32 +346 +run: r/m32 is EBX 347 +run: SF=1; ZF=0; OF=0 348 349 :(scenario compare_imm32_with_r32_equal) @@ -416,8 +416,8 @@ if ('onhashchange' in window) { 352 # op ModR/M SIB displacement immediate 353 81 fb 0a 0b 0c 0d # compare 0x0d0c0b0a with EBX 354 # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) -355 +run: combine imm32 0x0d0c0b0a with r/m32 -356 +run: r/m32 is EBX +355 +run: combine imm32 0x0d0c0b0a with r/m32 +356 +run: r/m32 is EBX 357 +run: SF=0; ZF=1; OF=0 358 359 :(scenario compare_imm32_with_mem_at_r32_greater) @@ -428,7 +428,7 @@ if ('onhashchange' in window) { 364 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) 365 == 0x60 # data segment 366 0a 0b 0c 0d # 0x0d0c0b0a -367 +run: combine imm32 0x0d0c0b07 with r/m32 +367 +run: combine imm32 0x0d0c0b07 with r/m32 368 +run: effective address is 0x60 (EBX) 369 +run: SF=0; ZF=0; OF=0 370 @@ -440,7 +440,7 @@ if ('onhashchange' in window) { 376 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) 377 == 0x60 # data segment 378 07 0b 0c 0d # 0x0d0c0b07 -379 +run: combine imm32 0x0d0c0b0a with r/m32 +379 +run: combine imm32 0x0d0c0b0a with r/m32 380 +run: effective address is 0x60 (EBX) 381 +run: SF=1; ZF=0; OF=0 382 @@ -453,27 +453,27 @@ if ('onhashchange' in window) { 389 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) 390 == 0x60 # data segment 391 0a 0b 0c 0d # 0x0d0c0b0a -392 +run: combine imm32 0x0d0c0b0a with r/m32 +392 +run: combine imm32 0x0d0c0b0a with r/m32 393 +run: effective address is 0x60 (EBX) 394 +run: SF=0; ZF=1; OF=0 395 396 //:: copy (mov) 397 398 :(before "End Initialize Op Names(name)") -399 put(name, "b8", "copy imm32 to R0 (EAX)"); -400 put(name, "b9", "copy imm32 to R1 (ECX)"); -401 put(name, "ba", "copy imm32 to R2 (EDX)"); -402 put(name, "bb", "copy imm32 to R3 (EBX)"); -403 put(name, "bc", "copy imm32 to R4 (ESP)"); -404 put(name, "bd", "copy imm32 to R5 (EBP)"); -405 put(name, "be", "copy imm32 to R6 (ESI)"); -406 put(name, "bf", "copy imm32 to R7 (EDI)"); +399 put(name, "b8", "copy imm32 to R0 (EAX)"); +400 put(name, "b9", "copy imm32 to R1 (ECX)"); +401 put(name, "ba", "copy imm32 to R2 (EDX)"); +402 put(name, "bb", "copy imm32 to R3 (EBX)"); +403 put(name, "bc", "copy imm32 to R4 (ESP)"); +404 put(name, "bd", "copy imm32 to R5 (EBP)"); +405 put(name, "be", "copy imm32 to R6 (ESI)"); +406 put(name, "bf", "copy imm32 to R7 (EDI)"); 407 408 :(scenario copy_imm32_to_r32) 409 == 0x1 410 # op ModR/M SIB displacement immediate 411 bb 0a 0b 0c 0d # copy 0x0d0c0b0a to EBX -412 +run: copy imm32 0x0d0c0b0a to EBX +412 +run: copy imm32 0x0d0c0b0a to EBX 413 414 :(before "End Single-Byte Opcodes") 415 case 0xb8: @@ -485,8 +485,8 @@ if ('onhashchange' in window) { 421 case 0xbe: 422 case 0xbf: { // copy imm32 to r32 423 uint8_t reg1 = op & 0x7; -424 int32_t arg2 = imm32(); -425 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to " << rname(reg1) << end(); +424 int32_t arg2 = imm32(); +425 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to " << rname(reg1) << end(); 426 Reg[reg1].i = arg2; 427 break; 428 } @@ -494,7 +494,7 @@ if ('onhashchange' in window) { 430 //: 431 432 :(before "End Initialize Op Names(name)") -433 put(name, "c7", "copy imm32 to rm32"); +433 put(name, "c7", "copy imm32 to rm32"); 434 435 :(scenario copy_imm32_to_mem_at_r32) 436 % Reg[EBX].i = 0x60; @@ -502,14 +502,14 @@ if ('onhashchange' in window) { 438 # op ModR/M SIB displacement immediate 439 c7 03 0a 0b 0c 0d # copy 0x0d0c0b0a to *EBX 440 # ModR/M in binary: 00 (indirect mode) 000 (unused) 011 (dest EBX) -441 +run: copy imm32 0x0d0c0b0a to r/m32 +441 +run: copy imm32 0x0d0c0b0a to r/m32 442 +run: effective address is 0x60 (EBX) 443 444 :(before "End Single-Byte Opcodes") 445 case 0xc7: { // copy imm32 to r32 -446 uint8_t modrm = next(); -447 int32_t arg2 = imm32(); -448 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to r/m32" << end(); +446 uint8_t modrm = next(); +447 int32_t arg2 = imm32(); +448 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to r/m32" << end(); 449 int32_t* arg1 = effective_address(modrm); 450 *arg1 = arg2; 451 break; @@ -518,25 +518,25 @@ if ('onhashchange' in window) { 454 //:: push 455 456 :(before "End Initialize Op Names(name)") -457 put(name, "68", "push imm32 to stack"); +457 put(name, "68", "push imm32 to stack"); 458 459 :(scenario push_imm32) 460 % Reg[ESP].u = 0x14; 461 == 0x1 462 # op ModR/M SIB displacement immediate 463 68 af 00 00 00 # push *EAX to stack -464 +run: push imm32 0x000000af -465 +run: ESP is now 0x00000010 -466 +run: contents at ESP: 0x000000af +464 +run: push imm32 0x000000af +465 +run: ESP is now 0x00000010 +466 +run: contents at ESP: 0x000000af 467 468 :(before "End Single-Byte Opcodes") 469 case 0x68: { -470 uint32_t val = static_cast<uint32_t>(imm32()); -471 trace(90, "run") << "push imm32 0x" << HEXWORD << val << end(); +470 uint32_t val = static_cast<uint32_t>(imm32()); +471 trace(90, "run") << "push imm32 0x" << HEXWORD << val << end(); 472 //? cerr << "push: " << val << " => " << Reg[ESP].u << '\n'; 473 push(val); -474 trace(90, "run") << "ESP is now 0x" << HEXWORD << Reg[ESP].u << end(); -475 trace(90, "run") << "contents at ESP: 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << end(); +474 trace(90, "run") << "ESP is now 0x" << HEXWORD << Reg[ESP].u << end(); +475 trace(90, "run") << "contents at ESP: 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << end(); 476 break; 477 } diff --git a/html/subx/016index_addressing.cc.html b/html/subx/016index_addressing.cc.html index 1fe7ab8f..275dee17 100644 --- a/html/subx/016index_addressing.cc.html +++ b/html/subx/016index_addressing.cc.html @@ -72,7 +72,7 @@ if ('onhashchange' in window) { 10 # SIB in binary: 00 (scale 1) 100 (no index) 000 (base EAX) 11 == 0x60 # data segment 12 01 00 00 00 # 1 - 13 +run: add EBX to r/m32 + 13 +run: add EBX to r/m32 14 +run: effective address is initially 0x60 (EAX) 15 +run: effective address is 0x60 16 +run: storing 0x00000011 @@ -83,27 +83,27 @@ if ('onhashchange' in window) { 21 break; 22 :(code) 23 uint32_t effective_address_from_sib(uint8_t mod) { - 24 uint8_t sib = next(); + 24 uint8_t sib = next(); 25 uint8_t base = sib&0x7; 26 uint32_t addr = 0; - 27 if (base != EBP || mod != 0) { + 27 if (base != EBP || mod != 0) { 28 addr = Reg[base].u; - 29 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(base) << ")" << end(); + 29 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(base) << ")" << end(); 30 } 31 else { 32 // base == EBP && mod == 0 - 33 addr = imm32(); // ignore base - 34 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (disp32)" << end(); + 33 addr = imm32(); // ignore base + 34 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (disp32)" << end(); 35 } 36 uint8_t index = (sib>>3)&0x7; - 37 if (index == ESP) { + 37 if (index == ESP) { 38 // ignore index and scale - 39 trace(90, "run") << "effective address is 0x" << std::hex << addr << end(); + 39 trace(90, "run") << "effective address is 0x" << std::hex << addr << end(); 40 } 41 else { 42 uint8_t scale = (1 << (sib>>6)); 43 addr += Reg[index].i*scale; // treat index register as signed. Maybe base as well? But we'll always ensure it's non-negative. - 44 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding " << rname(index) << "*" << NUM(scale) << ")" << end(); + 44 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding " << rname(index) << "*" << NUM(scale) << ")" << end(); 45 } 46 return addr; 47 } @@ -119,7 +119,7 @@ if ('onhashchange' in window) { 57 # SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX) 58 == 0x60 # data segment 59 01 00 00 00 # 1 - 60 +run: add EBX to r/m32 + 60 +run: add EBX to r/m32 61 +run: effective address is initially 0x5e (EAX) 62 +run: effective address is 0x60 (after adding ECX*1) 63 +run: storing 0x00000011 @@ -133,7 +133,7 @@ if ('onhashchange' in window) { 71 # SIB in binary: 00 (scale 1) 100 (no index) 101 (not EBP but disp32) 72 == 0x60 # data segment 73 01 00 00 00 # 1 - 74 +run: add EBX to r/m32 + 74 +run: add EBX to r/m32 75 +run: effective address is initially 0x60 (disp32) 76 +run: effective address is 0x60 77 +run: storing 0x00000011 @@ -151,7 +151,7 @@ if ('onhashchange' in window) { 89 # SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX) 90 == 0x60 # data segment 91 01 00 00 00 # 1 - 92 +run: add EBX to r/m32 + 92 +run: add EBX to r/m32 93 +run: effective address is initially 0x59 (EAX) 94 +run: effective address is 0x5e (after adding ECX*1) 95 +run: effective address is 0x60 (after adding disp8) @@ -175,7 +175,7 @@ if ('onhashchange' in window) { 113 # SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX) 114 == 0x60 # data segment 115 01 00 00 00 # 1 -116 +run: add EBX to r/m32 +116 +run: add EBX to r/m32 117 +run: effective address is initially 0x59 (EAX) 118 +run: effective address is 0x5e (after adding ECX*1) 119 +run: effective address is 0x60 (after adding disp32) diff --git a/html/subx/017jump_disp8.cc.html b/html/subx/017jump_disp8.cc.html index fddb1da6..a824a5f3 100644 --- a/html/subx/017jump_disp8.cc.html +++ b/html/subx/017jump_disp8.cc.html @@ -66,7 +66,7 @@ if ('onhashchange' in window) { 3 //:: jump 4 5 :(before "End Initialize Op Names(name)") - 6 put(name, "eb", "jump disp8 bytes away"); + 6 put(name, "eb", "jump disp8 bytes away"); 7 8 :(scenario jump_rel8) 9 == 0x1 @@ -81,16 +81,16 @@ if ('onhashchange' in window) { 18 19 :(before "End Single-Byte Opcodes") 20 case 0xeb: { // jump rel8 - 21 int8_t offset = static_cast<int>(next()); - 22 trace(90, "run") << "jump " << NUM(offset) << end(); - 23 EIP += offset; + 21 int8_t offset = static_cast<int>(next()); + 22 trace(90, "run") << "jump " << NUM(offset) << end(); + 23 EIP += offset; 24 break; 25 } 26 27 //:: jump if equal/zero 28 29 :(before "End Initialize Op Names(name)") - 30 put(name, "74", "jump disp8 bytes away if ZF is set"); + 30 put(name, "74", "jump disp8 bytes away if ZF is set"); 31 32 :(scenario je_rel8_success) 33 % ZF = true; @@ -106,10 +106,10 @@ if ('onhashchange' in window) { 43 44 :(before "End Single-Byte Opcodes") 45 case 0x74: { // jump rel8 if ZF - 46 int8_t offset = static_cast<int>(next()); + 46 int8_t offset = static_cast<int>(next()); 47 if (ZF) { - 48 trace(90, "run") << "jump " << NUM(offset) << end(); - 49 EIP += offset; + 48 trace(90, "run") << "jump " << NUM(offset) << end(); + 49 EIP += offset; 50 } 51 break; 52 } @@ -129,7 +129,7 @@ if ('onhashchange' in window) { 66 //:: jump if not equal/not zero 67 68 :(before "End Initialize Op Names(name)") - 69 put(name, "75", "jump disp8 bytes away if ZF is not set"); + 69 put(name, "75", "jump disp8 bytes away if ZF is not set"); 70 71 :(scenario jne_rel8_success) 72 % ZF = false; @@ -145,10 +145,10 @@ if ('onhashchange' in window) { 82 83 :(before "End Single-Byte Opcodes") 84 case 0x75: { // jump rel8 unless ZF - 85 int8_t offset = static_cast<int>(next()); + 85 int8_t offset = static_cast<int>(next()); 86 if (!ZF) { - 87 trace(90, "run") << "jump " << NUM(offset) << end(); - 88 EIP += offset; + 87 trace(90, "run") << "jump " << NUM(offset) << end(); + 88 EIP += offset; 89 } 90 break; 91 } @@ -168,12 +168,12 @@ if ('onhashchange' in window) { 105 //:: jump if greater 106 107 :(before "End Initialize Op Names(name)") -108 put(name, "7f", "jump disp8 bytes away if greater (ZF is unset, SF == OF)"); +108 put(name, "7f", "jump disp8 bytes away if greater (ZF is unset, SF == OF)"); 109 110 :(scenario jg_rel8_success) 111 % ZF = false; 112 % SF = false; -113 % OF = false; +113 % OF = false; 114 == 0x1 115 # op ModR/M SIB displacement immediate 116 7f 05 # skip 1 instruction @@ -186,10 +186,10 @@ if ('onhashchange' in window) { 123 124 :(before "End Single-Byte Opcodes") 125 case 0x7f: { // jump rel8 if !SF and !ZF -126 int8_t offset = static_cast<int>(next()); -127 if (!ZF && SF == OF) { -128 trace(90, "run") << "jump " << NUM(offset) << end(); -129 EIP += offset; +126 int8_t offset = static_cast<int>(next()); +127 if (!ZF && SF == OF) { +128 trace(90, "run") << "jump " << NUM(offset) << end(); +129 EIP += offset; 130 } 131 break; 132 } @@ -197,7 +197,7 @@ if ('onhashchange' in window) { 134 :(scenario jg_rel8_fail) 135 % ZF = false; 136 % SF = true; -137 % OF = false; +137 % OF = false; 138 == 0x1 139 # op ModR/M SIB displacement immediate 140 7f 05 # skip 1 instruction @@ -211,11 +211,11 @@ if ('onhashchange' in window) { 148 //:: jump if greater or equal 149 150 :(before "End Initialize Op Names(name)") -151 put(name, "7d", "jump disp8 bytes away if greater or equal (SF == OF)"); +151 put(name, "7d", "jump disp8 bytes away if greater or equal (SF == OF)"); 152 153 :(scenario jge_rel8_success) 154 % SF = false; -155 % OF = false; +155 % OF = false; 156 == 0x1 157 # op ModR/M SIB displacement immediate 158 7d 05 # skip 1 instruction @@ -228,17 +228,17 @@ if ('onhashchange' in window) { 165 166 :(before "End Single-Byte Opcodes") 167 case 0x7d: { // jump rel8 if !SF -168 int8_t offset = static_cast<int>(next()); -169 if (SF == OF) { -170 trace(90, "run") << "jump " << NUM(offset) << end(); -171 EIP += offset; +168 int8_t offset = static_cast<int>(next()); +169 if (SF == OF) { +170 trace(90, "run") << "jump " << NUM(offset) << end(); +171 EIP += offset; 172 } 173 break; 174 } 175 176 :(scenario jge_rel8_fail) 177 % SF = true; -178 % OF = false; +178 % OF = false; 179 == 0x1 180 # op ModR/M SIB displacement immediate 181 7d 05 # skip 1 instruction @@ -252,12 +252,12 @@ if ('onhashchange' in window) { 189 //:: jump if lesser 190 191 :(before "End Initialize Op Names(name)") -192 put(name, "7c", "jump disp8 bytes away if lesser (SF != OF)"); +192 put(name, "7c", "jump disp8 bytes away if lesser (SF != OF)"); 193 194 :(scenario jl_rel8_success) 195 % ZF = false; 196 % SF = true; -197 % OF = false; +197 % OF = false; 198 == 0x1 199 # op ModR/M SIB displacement immediate 200 7c 05 # skip 1 instruction @@ -270,10 +270,10 @@ if ('onhashchange' in window) { 207 208 :(before "End Single-Byte Opcodes") 209 case 0x7c: { // jump rel8 if SF and !ZF -210 int8_t offset = static_cast<int>(next()); -211 if (SF != OF) { -212 trace(90, "run") << "jump " << NUM(offset) << end(); -213 EIP += offset; +210 int8_t offset = static_cast<int>(next()); +211 if (SF != OF) { +212 trace(90, "run") << "jump " << NUM(offset) << end(); +213 EIP += offset; 214 } 215 break; 216 } @@ -281,7 +281,7 @@ if ('onhashchange' in window) { 218 :(scenario jl_rel8_fail) 219 % ZF = false; 220 % SF = false; -221 % OF = false; +221 % OF = false; 222 == 0x1 223 # op ModR/M SIB displacement immediate 224 7c 05 # skip 1 instruction @@ -295,12 +295,12 @@ if ('onhashchange' in window) { 232 //:: jump if lesser or equal 233 234 :(before "End Initialize Op Names(name)") -235 put(name, "7e", "jump disp8 bytes away if lesser or equal (ZF is set or SF != OF)"); +235 put(name, "7e", "jump disp8 bytes away if lesser or equal (ZF is set or SF != OF)"); 236 237 :(scenario jle_rel8_equal) 238 % ZF = true; 239 % SF = false; -240 % OF = false; +240 % OF = false; 241 == 0x1 242 # op ModR/M SIB displacement immediate 243 7e 05 # skip 1 instruction @@ -314,7 +314,7 @@ if ('onhashchange' in window) { 251 :(scenario jle_rel8_lesser) 252 % ZF = false; 253 % SF = true; -254 % OF = false; +254 % OF = false; 255 == 0x1 256 # op ModR/M SIB displacement immediate 257 7e 05 # skip 1 instruction @@ -327,10 +327,10 @@ if ('onhashchange' in window) { 264 265 :(before "End Single-Byte Opcodes") 266 case 0x7e: { // jump rel8 if SF or ZF -267 int8_t offset = static_cast<int>(next()); -268 if (ZF || SF != OF) { -269 trace(90, "run") << "jump " << NUM(offset) << end(); -270 EIP += offset; +267 int8_t offset = static_cast<int>(next()); +268 if (ZF || SF != OF) { +269 trace(90, "run") << "jump " << NUM(offset) << end(); +270 EIP += offset; 271 } 272 break; 273 } @@ -338,7 +338,7 @@ if ('onhashchange' in window) { 275 :(scenario jle_rel8_greater) 276 % ZF = false; 277 % SF = false; -278 % OF = false; +278 % OF = false; 279 == 0x1 280 # op ModR/M SIB displacement immediate 281 7e 05 # skip 1 instruction diff --git a/html/subx/018jump_disp16.cc.html b/html/subx/018jump_disp16.cc.html index b4e62f89..31ad160f 100644 --- a/html/subx/018jump_disp16.cc.html +++ b/html/subx/018jump_disp16.cc.html @@ -66,7 +66,7 @@ if ('onhashchange' in window) { 3 //:: jump 4 5 :(before "End Initialize Op Names(name)") - 6 put(name, "e9", "jump disp16 bytes away"); + 6 put(name, "e9", "jump disp16 bytes away"); 7 8 :(scenario jump_rel16) 9 == 0x1 @@ -82,21 +82,21 @@ if ('onhashchange' in window) { 19 :(before "End Single-Byte Opcodes") 20 case 0xe9: { // jump rel8 21 int16_t offset = imm16(); - 22 trace(90, "run") << "jump " << offset << end(); - 23 EIP += offset; + 22 trace(90, "run") << "jump " << offset << end(); + 23 EIP += offset; 24 break; 25 } 26 :(code) 27 int16_t imm16() { - 28 int16_t result = next(); - 29 result |= (next()<<8); + 28 int16_t result = next(); + 29 result |= (next()<<8); 30 return result; 31 } 32 33 //:: jump if equal/zero 34 35 :(before "End Initialize Op Names(name)") - 36 put(name_0f, "84", "jump disp16 bytes away if ZF is set"); + 36 put(name_0f, "84", "jump disp16 bytes away if ZF is set"); 37 38 :(scenario je_rel16_success) 39 % ZF = true; @@ -114,8 +114,8 @@ if ('onhashchange' in window) { 51 case 0x84: { // jump rel16 if ZF 52 int8_t offset = imm16(); 53 if (ZF) { - 54 trace(90, "run") << "jump " << NUM(offset) << end(); - 55 EIP += offset; + 54 trace(90, "run") << "jump " << NUM(offset) << end(); + 55 EIP += offset; 56 } 57 break; 58 } @@ -135,7 +135,7 @@ if ('onhashchange' in window) { 72 //:: jump if not equal/not zero 73 74 :(before "End Initialize Op Names(name)") - 75 put(name_0f, "85", "jump disp16 bytes away if ZF is not set"); + 75 put(name_0f, "85", "jump disp16 bytes away if ZF is not set"); 76 77 :(scenario jne_rel16_success) 78 % ZF = false; @@ -153,8 +153,8 @@ if ('onhashchange' in window) { 90 case 0x85: { // jump rel16 unless ZF 91 int8_t offset = imm16(); 92 if (!ZF) { - 93 trace(90, "run") << "jump " << NUM(offset) << end(); - 94 EIP += offset; + 93 trace(90, "run") << "jump " << NUM(offset) << end(); + 94 EIP += offset; 95 } 96 break; 97 } @@ -174,12 +174,12 @@ if ('onhashchange' in window) { 111 //:: jump if greater 112 113 :(before "End Initialize Op Names(name)") -114 put(name_0f, "8f", "jump disp16 bytes away if greater (ZF is unset, SF == OF)"); +114 put(name_0f, "8f", "jump disp16 bytes away if greater (ZF is unset, SF == OF)"); 115 116 :(scenario jg_rel16_success) 117 % ZF = false; 118 % SF = false; -119 % OF = false; +119 % OF = false; 120 == 0x1 121 # op ModR/M SIB displacement immediate 122 0f 8f 05 00 # skip 1 instruction @@ -193,9 +193,9 @@ if ('onhashchange' in window) { 130 :(before "End Two-Byte Opcodes Starting With 0f") 131 case 0x8f: { // jump rel16 if !SF and !ZF 132 int8_t offset = imm16(); -133 if (!ZF && SF == OF) { -134 trace(90, "run") << "jump " << NUM(offset) << end(); -135 EIP += offset; +133 if (!ZF && SF == OF) { +134 trace(90, "run") << "jump " << NUM(offset) << end(); +135 EIP += offset; 136 } 137 break; 138 } @@ -203,7 +203,7 @@ if ('onhashchange' in window) { 140 :(scenario jg_rel16_fail) 141 % ZF = false; 142 % SF = true; -143 % OF = false; +143 % OF = false; 144 == 0x1 145 # op ModR/M SIB displacement immediate 146 0f 8f 05 00 # skip 1 instruction @@ -217,11 +217,11 @@ if ('onhashchange' in window) { 154 //:: jump if greater or equal 155 156 :(before "End Initialize Op Names(name)") -157 put(name_0f, "8d", "jump disp16 bytes away if greater or equal (SF == OF)"); +157 put(name_0f, "8d", "jump disp16 bytes away if greater or equal (SF == OF)"); 158 159 :(scenario jge_rel16_success) 160 % SF = false; -161 % OF = false; +161 % OF = false; 162 == 0x1 163 # op ModR/M SIB displacement immediate 164 0f 8d 05 00 # skip 1 instruction @@ -235,16 +235,16 @@ if ('onhashchange' in window) { 172 :(before "End Two-Byte Opcodes Starting With 0f") 173 case 0x8d: { // jump rel16 if !SF 174 int8_t offset = imm16(); -175 if (SF == OF) { -176 trace(90, "run") << "jump " << NUM(offset) << end(); -177 EIP += offset; +175 if (SF == OF) { +176 trace(90, "run") << "jump " << NUM(offset) << end(); +177 EIP += offset; 178 } 179 break; 180 } 181 182 :(scenario jge_rel16_fail) 183 % SF = true; -184 % OF = false; +184 % OF = false; 185 == 0x1 186 # op ModR/M SIB displacement immediate 187 0f 8d 05 00 # skip 1 instruction @@ -258,12 +258,12 @@ if ('onhashchange' in window) { 195 //:: jump if lesser 196 197 :(before "End Initialize Op Names(name)") -198 put(name_0f, "8c", "jump disp16 bytes away if lesser (SF != OF)"); +198 put(name_0f, "8c", "jump disp16 bytes away if lesser (SF != OF)"); 199 200 :(scenario jl_rel16_success) 201 % ZF = false; 202 % SF = true; -203 % OF = false; +203 % OF = false; 204 == 0x1 205 # op ModR/M SIB displacement immediate 206 0f 8c 05 00 # skip 1 instruction @@ -277,9 +277,9 @@ if ('onhashchange' in window) { 214 :(before "End Two-Byte Opcodes Starting With 0f") 215 case 0x8c: { // jump rel16 if SF and !ZF 216 int8_t offset = imm16(); -217 if (SF != OF) { -218 trace(90, "run") << "jump " << NUM(offset) << end(); -219 EIP += offset; +217 if (SF != OF) { +218 trace(90, "run") << "jump " << NUM(offset) << end(); +219 EIP += offset; 220 } 221 break; 222 } @@ -287,7 +287,7 @@ if ('onhashchange' in window) { 224 :(scenario jl_rel16_fail) 225 % ZF = false; 226 % SF = false; -227 % OF = false; +227 % OF = false; 228 == 0x1 229 # op ModR/M SIB displacement immediate 230 0f 8c 05 00 # skip 1 instruction @@ -301,12 +301,12 @@ if ('onhashchange' in window) { 238 //:: jump if lesser or equal 239 240 :(before "End Initialize Op Names(name)") -241 put(name_0f, "8e", "jump disp16 bytes away if lesser or equal (ZF is set or SF != OF)"); +241 put(name_0f, "8e", "jump disp16 bytes away if lesser or equal (ZF is set or SF != OF)"); 242 243 :(scenario jle_rel16_equal) 244 % ZF = true; 245 % SF = false; -246 % OF = false; +246 % OF = false; 247 == 0x1 248 # op ModR/M SIB displacement immediate 249 0f 8e 05 00 # skip 1 instruction @@ -320,7 +320,7 @@ if ('onhashchange' in window) { 257 :(scenario jle_rel16_lesser) 258 % ZF = false; 259 % SF = true; -260 % OF = false; +260 % OF = false; 261 == 0x1 262 # op ModR/M SIB displacement immediate 263 0f 8e 05 00 # skip 1 instruction @@ -334,9 +334,9 @@ if ('onhashchange' in window) { 271 :(before "End Two-Byte Opcodes Starting With 0f") 272 case 0x8e: { // jump rel16 if SF or ZF 273 int8_t offset = imm16(); -274 if (ZF || SF != OF) { -275 trace(90, "run") << "jump " << NUM(offset) << end(); -276 EIP += offset; +274 if (ZF || SF != OF) { +275 trace(90, "run") << "jump " << NUM(offset) << end(); +276 EIP += offset; 277 } 278 break; 279 } @@ -344,7 +344,7 @@ if ('onhashchange' in window) { 281 :(scenario jle_rel16_greater) 282 % ZF = false; 283 % SF = false; -284 % OF = false; +284 % OF = false; 285 == 0x1 286 # op ModR/M SIB displacement immediate 287 0f 8e 05 00 # skip 1 instruction diff --git a/html/subx/019functions.cc.html b/html/subx/019functions.cc.html index a45b9ddb..22830fed 100644 --- a/html/subx/019functions.cc.html +++ b/html/subx/019functions.cc.html @@ -64,7 +64,7 @@ if ('onhashchange' in window) { 1 //:: call 2 3 :(before "End Initialize Op Names(name)") - 4 put(name, "e8", "call disp32"); + 4 put(name, "e8", "call disp32"); 5 6 :(scenario call_disp32) 7 % Reg[ESP].u = 0x64; @@ -72,19 +72,19 @@ if ('onhashchange' in window) { 9 # op ModR/M SIB displacement immediate 10 e8 a0 00 00 00 # call function offset at 0x000000a0 11 # next EIP is 6 -12 +run: call imm32 0x000000a0 -13 +run: decrementing ESP to 0x00000060 +12 +run: call imm32 0x000000a0 +13 +run: decrementing ESP to 0x00000060 14 +run: pushing value 0x00000006 15 +run: jumping to 0x000000a6 16 17 :(before "End Single-Byte Opcodes") 18 case 0xe8: { // call disp32 relative to next EIP -19 int32_t offset = imm32(); -20 trace(90, "run") << "call imm32 0x" << HEXWORD << offset << end(); +19 int32_t offset = imm32(); +20 trace(90, "run") << "call imm32 0x" << HEXWORD << offset << end(); 21 //? cerr << "push: EIP: " << EIP << " => " << Reg[ESP].u << '\n'; -22 push(EIP); -23 EIP += offset; -24 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +22 push(EIP); +23 EIP += offset; +24 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 25 break; 26 } 27 @@ -98,18 +98,18 @@ if ('onhashchange' in window) { 35 ff d3 # call function offset at EBX 36 # next EIP is 3 37 +run: call to r/m32 -38 +run: r/m32 is EBX -39 +run: decrementing ESP to 0x00000060 +38 +run: r/m32 is EBX +39 +run: decrementing ESP to 0x00000060 40 +run: pushing value 0x00000003 41 +run: jumping to 0x000000a3 42 43 :(before "End Op ff Subops") 44 case 2: { // call function pointer at r/m32 -45 trace(90, "run") << "call to r/m32" << end(); +45 trace(90, "run") << "call to r/m32" << end(); 46 int32_t* offset = effective_address(modrm); -47 push(EIP); -48 EIP += *offset; -49 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +47 push(EIP); +48 EIP += *offset; +49 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 50 break; 51 } 52 @@ -124,14 +124,14 @@ if ('onhashchange' in window) { 61 a0 00 00 00 # 0xa0 62 +run: call to r/m32 63 +run: effective address is 0x10 (EBX) -64 +run: decrementing ESP to 0x00000060 +64 +run: decrementing ESP to 0x00000060 65 +run: pushing value 0x00000003 66 +run: jumping to 0x000000a3 67 68 //:: ret 69 70 :(before "End Initialize Op Names(name)") -71 put(name, "c3", "return from most recent unfinished call"); +71 put(name, "c3", "return from most recent unfinished call"); 72 73 :(scenario ret) 74 % Reg[ESP].u = 0x60; @@ -146,9 +146,9 @@ if ('onhashchange' in window) { 83 84 :(before "End Single-Byte Opcodes") 85 case 0xc3: { // return from a call -86 trace(90, "run") << "return" << end(); -87 EIP = pop(); -88 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); +86 trace(90, "run") << "return" << end(); +87 EIP = pop(); +88 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 89 break; 90 } diff --git a/html/subx/020syscalls.cc.html b/html/subx/020syscalls.cc.html index c29ac354..c4d39b2e 100644 --- a/html/subx/020syscalls.cc.html +++ b/html/subx/020syscalls.cc.html @@ -22,6 +22,7 @@ a:hover { text-decoration: underline; } .LineNr { color: #444444; } .Identifier { color: #c0a020; } .Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } +.PreProc { color: #800080; } .cSpecial { color: #008000; } --> @@ -57,38 +58,116 @@ if ('onhashchange' in window) {

- 1 :(before "End Initialize Op Names(name)")
- 2 put(name, "cd", "software interrupt");
- 3 
- 4 :(before "End Single-Byte Opcodes")
- 5 case 0xcd: {  // int imm8 (software interrupt)
- 6   trace(90, "run") << "syscall" << end();
- 7   uint8_t code = next();
- 8   if (code != 0x80) {
- 9     raise << "Unimplemented interrupt code " << HEXBYTE << code << '\n' << end();
-10     raise << "  Only `int 80h` supported for now.\n" << end();
-11     break;
-12   }
-13   process_int80();
-14   break;
-15 }
-16 
-17 :(code)
-18 void process_int80() {
-19   switch (Reg[EAX].u) {
-20   case 1:
-21     exit(/*exit code*/Reg[EBX].u);
-22     break;
-23   case 3:
-24     Reg[EAX].i = read(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
-25     break;
-26   case 4:
-27     Reg[EAX].i = write(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
-28     break;
-29   default:
-30     raise << HEXWORD << EIP << ": unimplemented syscall " << Reg[EAX].u << '\n' << end();
-31   }
-32 }
+  1 :(before "End Initialize Op Names(name)")
+  2 put(name, "cd", "software interrupt");
+  3 
+  4 :(before "End Single-Byte Opcodes")
+  5 case 0xcd: {  // int imm8 (software interrupt)
+  6   trace(90, "run") << "syscall" << end();
+  7   uint8_t code = next();
+  8   if (code != 0x80) {
+  9     raise << "Unimplemented interrupt code " << HEXBYTE << code << '\n' << end();
+ 10     raise << "  Only `int 80h` supported for now.\n" << end();
+ 11     break;
+ 12   }
+ 13   process_int80();
+ 14   break;
+ 15 }
+ 16 
+ 17 :(code)
+ 18 void process_int80() {
+ 19   switch (Reg[EAX].u) {
+ 20   case 1:
+ 21     exit(/*exit code*/Reg[EBX].u);
+ 22     break;
+ 23   case 3:
+ 24     trace(91, "run") << "read: " << Reg[EBX].u << ' ' << Reg[ECX].u << '/' << mem_addr_string(Reg[ECX].u) << ' ' << Reg[EDX].u << end();
+ 25     Reg[EAX].i = read(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
+ 26     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 27     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 28     break;
+ 29   case 4:
+ 30     trace(91, "run") << "write: " << Reg[EBX].u << ' ' << Reg[ECX].u << '/' << mem_addr_string(Reg[ECX].u) << ' ' << Reg[EDX].u << end();
+ 31     Reg[EAX].i = write(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
+ 32     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 33     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 34     break;
+ 35   case 5: {
+ 36     check_flags(ECX);
+ 37     check_mode(EDX);
+ 38     trace(91, "run") << "open: " << Reg[EBX].u << '/' << mem_addr_string(Reg[EBX].u) << ' ' << Reg[ECX].u << end();
+ 39     Reg[EAX].i = open(/*filename*/mem_addr_string(Reg[EBX].u), /*flags*/Reg[ECX].u, /*mode*/0640);
+ 40     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 41     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 42     break;
+ 43   }
+ 44   case 6:
+ 45     trace(91, "run") << "close: " << Reg[EBX].u << end();
+ 46     Reg[EAX].i = close(/*file descriptor*/Reg[EBX].u);
+ 47     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 48     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 49     break;
+ 50   case 8:
+ 51     check_mode(ECX);
+ 52     trace(91, "run") << "creat: " << Reg[EBX].u << '/' << mem_addr_string(Reg[EBX].u) << end();
+ 53     Reg[EAX].i = creat(/*filename*/mem_addr_string(Reg[EBX].u), /*mode*/0640);
+ 54     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 55     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 56     break;
+ 57   case 10:
+ 58     trace(91, "run") << "unlink: " << Reg[EBX].u << '/' << mem_addr_string(Reg[EBX].u) << end();
+ 59     Reg[EAX].i = unlink(/*filename*/mem_addr_string(Reg[EBX].u));
+ 60     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 61     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 62     break;
+ 63   case 38:
+ 64     trace(91, "run") << "rename: " << Reg[EBX].u << '/' << mem_addr_string(Reg[EBX].u) << " -> " << Reg[ECX].u << '/' << mem_addr_string(Reg[ECX].u) << end();
+ 65     Reg[EAX].i = rename(/*old filename*/mem_addr_string(Reg[EBX].u), /*new filename*/mem_addr_string(Reg[ECX].u));
+ 66     trace(91, "run") << "result: " << Reg[EAX].i << end();
+ 67     if (Reg[EAX].i == -1) raise << strerror(errno) << '\n' << end();
+ 68     break;
+ 69   case 45:  // brk: modify size of data segment
+ 70     trace(91, "run") << "grow data segment to " << Reg[EBX].u << end();
+ 71     resize_mem(/*new end address*/Reg[EBX].u);
+ 72     break;
+ 73   default:
+ 74     raise << HEXWORD << EIP << ": unimplemented syscall " << Reg[EAX].u << '\n' << end();
+ 75   }
+ 76 }
+ 77 
+ 78 // SubX is oblivious to file permissions, directories, symbolic links, terminals, and much else besides.
+ 79 // Also ignoring any concurrency considerations for now.
+ 80 void check_flags(int reg) {
+ 81   uint32_t flags = Reg[reg].u;
+ 82   if (flags != ((flags & O_RDONLY) | (flags & O_WRONLY))) {
+ 83     raise << HEXWORD << EIP << ": most POSIX flags to the open() syscall are not supported. Just O_RDONLY and O_WRONLY for now. Zero concurrent access support.\n" << end();
+ 84     exit(1);
+ 85   }
+ 86   if ((flags & O_RDONLY) && (flags & O_WRONLY)) {
+ 87     raise << HEXWORD << EIP << ": can't open a file for both reading and writing at once. See http://man7.org/linux/man-pages/man2/open.2.html.\n" << end();
+ 88     exit(1);
+ 89   }
+ 90 }
+ 91 
+ 92 void check_mode(int reg) {
+ 93   if (Reg[reg].u != 0600) {
+ 94     raise << HEXWORD << EIP << ": SubX is oblivious to file permissions; register " << reg << " must be 0.\n" << end();
+ 95     exit(1);
+ 96   }
+ 97 }
+ 98 
+ 99 void resize_mem(uint32_t new_end_address) {
+100   if (new_end_address < Mem_offset) {
+101     raise << HEXWORD << EIP << ": can't shrink data segment to before code segment\n";
+102     return;
+103   }
+104   int32_t new_size = new_end_address - Mem_offset;
+105   if (new_size < SIZE(Mem)) {
+106     raise << HEXWORD << EIP << ": shrinking data segment is not supported.\n" << end();
+107     return;
+108   }
+109   Mem.resize(new_size);  // will throw exception on failure
+110 }

diff --git a/html/subx/028translate.cc.html b/html/subx/028translate.cc.html new file mode 100644 index 00000000..0838b51a --- /dev/null +++ b/html/subx/028translate.cc.html @@ -0,0 +1,223 @@ + + + + +Mu - subx/028translate.cc + + + + + + + + + + +

+  1 //: The bedrock level 1 of abstraction is now done, and we're going to start
+  2 //: building levels above it that make programming in x86 machine code a
+  3 //: little more ergonomic.
+  4 //:
+  5 //: All levels will be "pass through by default". Whatever they don't
+  6 //: understand they will silently pass through to lower levels.
+  7 //:
+  8 //: Since raw hex bytes of machine code are always possible to inject, SubX is
+  9 //: not a language, and we aren't building a compiler. This is something
+ 10 //: deliberately leakier. Levels are more for improving auditing, checks and
+ 11 //: error messages rather than for hiding low-level details.
+ 12 
+ 13 //: Translator workflow: read 'source' file. Run a series of transforms on it,
+ 14 //: each passing through what it doesn't understand. The final program should
+ 15 //: be just machine code, suitable to write to an ELF binary.
+ 16 //:
+ 17 //: Higher levels usually transform code on the basis of metadata.
+ 18 
+ 19 :(before "End Main")
+ 20 if (is_equal(argv[1], "translate")) {
+ 21   START_TRACING_UNTIL_END_OF_SCOPE;
+ 22   assert(argc > 3);
+ 23   program p;
+ 24   ifstream fin(argv[2]);
+ 25   if (!fin) {
+ 26     cerr << "could not open " << argv[2] << '\n';
+ 27     return 1;
+ 28   }
+ 29   parse(fin, p);
+ 30   if (trace_contains_errors()) return 1;
+ 31   transform(p);
+ 32   if (trace_contains_errors()) return 1;
+ 33   save_elf(p, argv[3]);
+ 34   if (trace_contains_errors()) unlink(argv[3]);
+ 35   return 0;
+ 36 }
+ 37 
+ 38 :(code)
+ 39 // write out a program to a bare-bones ELF file
+ 40 void save_elf(const program& p, const char* filename) {
+ 41   ofstream out(filename, ios::binary);
+ 42   write_elf_header(out, p);
+ 43   for (size_t i = 0;  i < p.segments.size();  ++i)
+ 44     write_segment(p.segments.at(i), out);
+ 45   out.close();
+ 46 }
+ 47 
+ 48 void write_elf_header(ostream& out, const program& p) {
+ 49   char c = '\0';
+ 50 #define O(X)  c = (X); out.write(&c, sizeof(c))
+ 51 // host is required to be little-endian
+ 52 #define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
+ 53   //// ehdr
+ 54   // e_ident
+ 55   O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
+ 56     O(0x1);  // 32-bit format
+ 57     O(0x1);  // little-endian
+ 58     O(0x1); O(0x0);
+ 59   for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
+ 60   // e_type
+ 61   O(0x02); O(0x00);
+ 62   // e_machine
+ 63   O(0x03); O(0x00);
+ 64   // e_version
+ 65   O(0x01); O(0x00); O(0x00); O(0x00);
+ 66   // e_entry
+ 67   int e_entry = p.segments.at(0).start;  // convention
+ 68   emit(e_entry);
+ 69   // e_phoff -- immediately after ELF header
+ 70   int e_phoff = 0x34;
+ 71   emit(e_phoff);
+ 72   // e_shoff; unused
+ 73   int dummy32 = 0;
+ 74   emit(dummy32);
+ 75   // e_flags; unused
+ 76   emit(dummy32);
+ 77   // e_ehsize
+ 78   uint16_t e_ehsize = 0x34;
+ 79   emit(e_ehsize);
+ 80   // e_phentsize
+ 81   uint16_t e_phentsize = 0x20;
+ 82   emit(e_phentsize);
+ 83   // e_phnum
+ 84   uint16_t e_phnum = SIZE(p.segments);
+ 85   emit(e_phnum);
+ 86   // e_shentsize
+ 87   uint16_t dummy16 = 0x0;
+ 88   emit(dummy16);
+ 89   // e_shnum
+ 90   emit(dummy16);
+ 91   // e_shstrndx
+ 92   emit(dummy16);
+ 93 
+ 94   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+ 95   for (int i = 0;  i < SIZE(p.segments);  ++i) {
+ 96     //// phdr
+ 97     // p_type
+ 98     uint32_t p_type = 0x1;
+ 99     emit(p_type);
+100     // p_offset
+101     emit(p_offset);
+102     // p_vaddr
+103     emit(p.segments.at(i).start);
+104     // p_paddr
+105     emit(p.segments.at(i).start);
+106     // p_filesz
+107     uint32_t size = size_of(p.segments.at(i));
+108     assert(size < SEGMENT_SIZE);
+109     emit(size);
+110     // p_memsz
+111     emit(size);
+112     // p_flags
+113     uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6;  // convention: only first segment is code
+114     emit(p_flags);
+115 
+116     // p_align
+117     // "As the system creates or augments a process image, it logically copies
+118     // a file's segment to a virtual memory segment.  When—and if— the system
+119     // physically reads the file depends on the program's execution behavior,
+120     // system load, and so on.  A process does not require a physical page
+121     // unless it references the logical page during execution, and processes
+122     // commonly leave many pages unreferenced. Therefore delaying physical
+123     // reads frequently obviates them, improving system performance. To obtain
+124     // this efficiency in practice, executable and shared object files must
+125     // have segment images whose file offsets and virtual addresses are
+126     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
+127     uint32_t p_align = 0x1000;  // default page size on linux
+128     emit(p_align);
+129     if (p_offset % p_align != p.segments.at(i).start % p_align) {
+130       raise << "segment starting at 0x" << HEXWORD << p.segments.at(i).start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p.segments.at(i).start % p_align) << '\n' << end();
+131       return;
+132     }
+133 
+134     // prepare for next segment
+135     p_offset += size;
+136   }
+137 #undef O
+138 #undef emit
+139 }
+140 
+141 void write_segment(const segment& s, ostream& out) {
+142   for (int i = 0;  i < SIZE(s.lines);  ++i) {
+143     const vector<word>& w = s.lines.at(i).words;
+144     for (int j = 0;  j < SIZE(w);  ++j) {
+145       uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
+146       out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
+147     }
+148   }
+149 }
+150 
+151 uint32_t size_of(const segment& s) {
+152   uint32_t sum = 0;
+153   for (int i = 0;  i < SIZE(s.lines);  ++i)
+154     sum += SIZE(s.lines.at(i).words);
+155   return sum;
+156 }
+157 
+158 :(before "End Includes")
+159 using std::ios;
+

+ + + diff --git a/html/subx/029transforms.cc.html b/html/subx/029transforms.cc.html new file mode 100644 index 00000000..6f067d6f --- /dev/null +++ b/html/subx/029transforms.cc.html @@ -0,0 +1,124 @@ + + + + +Mu - subx/029transforms.cc + + + + + + + + + + +

+ 1 //: Ordering transforms is a well-known hard problem when building compilers.
+ 2 //: In our case we also have the additional notion of layers. The ordering of
+ 3 //: layers can have nothing in common with the ordering of transforms when
+ 4 //: SubX is tangled and run. This can be confusing for readers, particularly
+ 5 //: if later layers start inserting transforms at arbitrary points between
+ 6 //: transforms introduced earlier. Over time adding transforms can get harder
+ 7 //: and harder, having to meet the constraints of everything that's come
+ 8 //: before. It's worth thinking about organization up-front so the ordering is
+ 9 //: easy to hold in our heads, and it's obvious where to add a new transform.
+10 //: Some constraints:
+11 //:
+12 //:   1. Layers force us to build SubX bottom-up; since we want to be able to
+13 //:   build and run SubX after stopping loading at any layer, the overall
+14 //:   organization has to be to introduce primitives before we start using
+15 //:   them.
+16 //:
+17 //:   2. Transforms usually need to be run top-down, converting high-level
+18 //:   representations to low-level ones so that low-level layers can be
+19 //:   oblivious to them.
+20 //:
+21 //:   3. When running we'd often like new representations to be checked before
+22 //:   they are transformed away. The whole reason for new representations is
+23 //:   often to add new kinds of automatic checking for our machine code
+24 //:   programs.
+25 //:
+26 //: Putting these constraints together, we'll use the following broad
+27 //: organization:
+28 //:
+29 //:   a) We'll divide up our transforms into "levels", each level consisting
+30 //:   of multiple transforms, and dealing in some new set of representational
+31 //:   ideas. Levels will be added in reverse order to the one their transforms
+32 //:   will be run in.
+33 //:
+34 //:     To run all transforms:
+35 //:       Load transforms for level n
+36 //:       Load transforms for level n-1
+37 //:       ...
+38 //:       Load transforms for level 2
+39 //:       Run code at level 1
+40 //:
+41 //:   b) *Within* a level we'll usually introduce transforms in the order
+42 //:   they're run in.
+43 //:
+44 //:     To run transforms for level n:
+45 //:       Perform transform of layer l
+46 //:       Perform transform of layer l+1
+47 //:       ...
+48 //:
+49 //:   c) Within a level it's often most natural to introduce a new
+50 //:   representation by showing how it's transformed to the level below. To
+51 //:   make such exceptions more obvious checks usually won't be first-class
+52 //:   transforms; instead code that keeps the program unmodified will run
+53 //:   within transforms before they mutate the program.
+54 //:
+55 //:     Level l transforms programs
+56 //:     Level l+1 inserts checks to run *before* the transform of level l runs
+57 //:
+58 //: This may all seem abstract, but will hopefully make sense over time. The
+59 //: goals are basically to always have a working program after any layer, to
+60 //: have the order of layers make narrative sense, and to order transforms
+61 //: correctly at runtime.
+62 
+63 :(before "End One-time Setup")
+64 // Begin Transforms
+65 // End Transforms
+

+ + + diff --git a/html/subx/029translate.cc.html b/html/subx/029translate.cc.html deleted file mode 100644 index 53e77fa0..00000000 --- a/html/subx/029translate.cc.html +++ /dev/null @@ -1,288 +0,0 @@ - - - - -Mu - subx/029translate.cc - - - - - - - - - - -

-  1 //: The bedrock level 1 of abstraction is now done, and we're going to start
-  2 //: building levels above it that make programming in x86 machine code a
-  3 //: little more ergonomic.
-  4 //:
-  5 //: All levels will be "pass through by default". Whatever they don't
-  6 //: understand they will silently pass through to lower levels.
-  7 //:
-  8 //: Since raw hex bytes of machine code are always possible to inject, SubX is
-  9 //: not a language, and we aren't building a compiler. This is something
- 10 //: deliberately leakier. Levels are more for improving auditing, checks and
- 11 //: error messages rather than for hiding low-level details.
- 12 
- 13 //: Translator workflow: read 'source' file. Run a series of transforms on it,
- 14 //: each passing through what it doesn't understand. The final program should
- 15 //: be just machine code, suitable to write to an ELF binary.
- 16 //:
- 17 //: Higher levels usually transform code on the basis of metadata.
- 18 
- 19 :(before "End Main")
- 20 if (is_equal(argv[1], "translate")) {
- 21   START_TRACING_UNTIL_END_OF_SCOPE;
- 22   assert(argc > 3);
- 23   program p;
- 24   ifstream fin(argv[2]);
- 25   if (!fin) {
- 26     cerr << "could not open " << argv[2] << '\n';
- 27     return 1;
- 28   }
- 29   parse(fin, p);
- 30   if (trace_contains_errors()) return 1;
- 31   transform(p);
- 32   if (trace_contains_errors()) return 1;
- 33   save_elf(p, argv[3]);
- 34   if (trace_contains_errors()) unlink(argv[3]);
- 35   return 0;
- 36 }
- 37 
- 38 //: Ordering transforms is a well-known hard problem when building compilers.
- 39 //: In our case we also have the additional notion of layers. The ordering of
- 40 //: layers can have nothing in common with the ordering of transforms when
- 41 //: SubX is tangled and run. This can be confusing for readers, particularly
- 42 //: if later layers start inserting transforms at arbitrary points between
- 43 //: transforms introduced earlier. Over time adding transforms can get harder
- 44 //: and harder, having to meet the constraints of everything that's come
- 45 //: before. It's worth thinking about organization up-front so the ordering is
- 46 //: easy to hold in our heads, and it's obvious where to add a new transform.
- 47 //: Some constraints:
- 48 //:
- 49 //:   1. Layers force us to build SubX bottom-up; since we want to be able to
- 50 //:   build and run SubX after stopping loading at any layer, the overall
- 51 //:   organization has to be to introduce primitives before we start using
- 52 //:   them.
- 53 //:
- 54 //:   2. Transforms usually need to be run top-down, converting high-level
- 55 //:   representations to low-level ones so that low-level layers can be
- 56 //:   oblivious to them.
- 57 //:
- 58 //:   3. When running we'd often like new representations to be checked before
- 59 //:   they are transformed away. The whole reason for new representations is
- 60 //:   often to add new kinds of automatic checking for our machine code
- 61 //:   programs.
- 62 //:
- 63 //: Putting these constraints together, we'll use the following broad
- 64 //: organization:
- 65 //:
- 66 //:   a) We'll divide up our transforms into "levels", each level consisting
- 67 //:   of multiple transforms, and dealing in some new set of representational
- 68 //:   ideas. Levels will be added in reverse order to the one their transforms
- 69 //:   will be run in.
- 70 //:
- 71 //:     To run all transforms:
- 72 //:       Load transforms for level n
- 73 //:       Load transforms for level n-1
- 74 //:       ...
- 75 //:       Load transforms for level 2
- 76 //:       Run code at level 1
- 77 //:
- 78 //:   b) *Within* a level we'll usually introduce transforms in the order
- 79 //:   they're run in.
- 80 //:
- 81 //:     To run transforms for level n:
- 82 //:       Perform transform of layer l
- 83 //:       Perform transform of layer l+1
- 84 //:       ...
- 85 //:
- 86 //:   c) Within a level it's often most natural to introduce a new
- 87 //:   representation by showing how it's transformed to the level below. To
- 88 //:   make such exceptions more obvious checks usually won't be first-class
- 89 //:   transforms; instead code that keeps the program unmodified will run
- 90 //:   within transforms before they mutate the program.
- 91 //:
- 92 //:     Level l transforms programs
- 93 //:     Level l+1 inserts checks to run *before* the transform of level l runs
- 94 //:
- 95 //: This may all seem abstract, but will hopefully make sense over time. The
- 96 //: goals are basically to always have a working program after any layer, to
- 97 //: have the order of layers make narrative sense, and to order transforms
- 98 //: correctly at runtime.
- 99 :(before "End One-time Setup")
-100 // Begin Transforms
-101 // End Transforms
-102 
-103 :(code)
-104 // write out a program to a bare-bones ELF file
-105 void save_elf(const program& p, const char* filename) {
-106   ofstream out(filename, ios::binary);
-107   write_elf_header(out, p);
-108   for (size_t i = 0;  i < p.segments.size();  ++i)
-109     write_segment(p.segments.at(i), out);
-110   out.close();
-111 }
-112 
-113 void write_elf_header(ostream& out, const program& p) {
-114   char c = '\0';
-115 #define O(X)  c = (X); out.write(&c, sizeof(c))
-116 // host is required to be little-endian
-117 #define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
-118   //// ehdr
-119   // e_ident
-120   O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
-121     O(0x1);  // 32-bit format
-122     O(0x1);  // little-endian
-123     O(0x1); O(0x0);
-124   for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
-125   // e_type
-126   O(0x02); O(0x00);
-127   // e_machine
-128   O(0x03); O(0x00);
-129   // e_version
-130   O(0x01); O(0x00); O(0x00); O(0x00);
-131   // e_entry
-132   int e_entry = p.segments.at(0).start;  // convention
-133   emit(e_entry);
-134   // e_phoff -- immediately after ELF header
-135   int e_phoff = 0x34;
-136   emit(e_phoff);
-137   // e_shoff; unused
-138   int dummy32 = 0;
-139   emit(dummy32);
-140   // e_flags; unused
-141   emit(dummy32);
-142   // e_ehsize
-143   uint16_t e_ehsize = 0x34;
-144   emit(e_ehsize);
-145   // e_phentsize
-146   uint16_t e_phentsize = 0x20;
-147   emit(e_phentsize);
-148   // e_phnum
-149   uint16_t e_phnum = SIZE(p.segments);
-150   emit(e_phnum);
-151   // e_shentsize
-152   uint16_t dummy16 = 0x0;
-153   emit(dummy16);
-154   // e_shnum
-155   emit(dummy16);
-156   // e_shstrndx
-157   emit(dummy16);
-158 
-159   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
-160   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-161     //// phdr
-162     // p_type
-163     uint32_t p_type = 0x1;
-164     emit(p_type);
-165     // p_offset
-166     emit(p_offset);
-167     // p_vaddr
-168     emit(p.segments.at(i).start);
-169     // p_paddr
-170     emit(p.segments.at(i).start);
-171     // p_filesz
-172     uint32_t size = size_of(p.segments.at(i));
-173     assert(size < SEGMENT_SIZE);
-174     emit(size);
-175     // p_memsz
-176     emit(size);
-177     // p_flags
-178     uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6;  // convention: only first segment is code
-179     emit(p_flags);
-180 
-181     // p_align
-182     // "As the system creates or augments a process image, it logically copies
-183     // a file's segment to a virtual memory segment.  When—and if— the system
-184     // physically reads the file depends on the program's execution behavior,
-185     // system load, and so on.  A process does not require a physical page
-186     // unless it references the logical page during execution, and processes
-187     // commonly leave many pages unreferenced. Therefore delaying physical
-188     // reads frequently obviates them, improving system performance. To obtain
-189     // this efficiency in practice, executable and shared object files must
-190     // have segment images whose file offsets and virtual addresses are
-191     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
-192     uint32_t p_align = 0x1000;  // default page size on linux
-193     emit(p_align);
-194     if (p_offset % p_align != p.segments.at(i).start % p_align) {
-195       raise << "segment starting at 0x" << HEXWORD << p.segments.at(i).start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p.segments.at(i).start % p_align) << '\n' << end();
-196       return;
-197     }
-198 
-199     // prepare for next segment
-200     p_offset += size;
-201   }
-202 #undef O
-203 #undef emit
-204 }
-205 
-206 void write_segment(const segment& s, ostream& out) {
-207   for (int i = 0;  i < SIZE(s.lines);  ++i) {
-208     const vector<word>& w = s.lines.at(i).words;
-209     for (int j = 0;  j < SIZE(w);  ++j) {
-210       uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
-211       out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
-212     }
-213   }
-214 }
-215 
-216 uint32_t size_of(const segment& s) {
-217   uint32_t sum = 0;
-218   for (int i = 0;  i < SIZE(s.lines);  ++i)
-219     sum += SIZE(s.lines.at(i).words);
-220   return sum;
-221 }
-222 
-223 :(before "End Includes")
-224 using std::ios;
-

- - - diff --git a/html/subx/030---operands.cc.html b/html/subx/030---operands.cc.html new file mode 100644 index 00000000..ea38d64b --- /dev/null +++ b/html/subx/030---operands.cc.html @@ -0,0 +1,536 @@ + + + + +Mu - subx/030---operands.cc + + + + + + + + + + +

+  1 //: Beginning of "level 2": tagging bytes with metadata around what field of
+  2 //: an x86 instruction they're for.
+  3 //:
+  4 //: The x86 instruction set is variable-length, and how a byte is interpreted
+  5 //: affects later instruction boundaries. A lot of the pain in programming
+  6 //: machine code stems from computer and programmer going out of sync on what
+  7 //: a byte means. The miscommunication is usually not immediately caught, and
+  8 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
+  9 //:
+ 10 //: To mitigate these issues, we'll start programming in terms of logical
+ 11 //: operands rather than physical bytes. Some operands are smaller than a
+ 12 //: byte, and others may consist of multiple bytes. This layer will correctly
+ 13 //: pack and order the bytes corresponding to the operands in an instruction.
+ 14 
+ 15 :(before "End Help Texts")
+ 16 put(Help, "instructions",
+ 17   "Each x86 instruction consists of an instruction or opcode and some number\n"
+ 18   "of operands.\n"
+ 19   "Each operand has a type. An instruction won't have more than one operand of\n"
+ 20   "any type.\n"
+ 21   "Each instruction has some set of allowed operand types. It'll reject others.\n"
+ 22   "The complete list of operand types: mod, subop, r32 (register), rm32\n"
+ 23   "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
+ 24   "imm32.\n"
+ 25   "Each of these has its own help page. Try reading 'subx help mod' next.\n"
+ 26 );
+ 27 :(before "End Help Contents")
+ 28 cerr << "  instructions\n";
+ 29 
+ 30 :(scenario pack_immediate_constants)
+ 31 == 0x1
+ 32 # instruction                     effective address                                                   operand     displacement    immediate
+ 33 # op          subop               mod             rm32          base        index         scale       r32
+ 34 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+ 35   bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
+ 36 +transform: packing instruction 'bb 0x2a/imm32'
+ 37 +transform: instruction after packing: 'bb 2a 00 00 00'
+ 38 +run: copy imm32 0x0000002a to EBX
+ 39 
+ 40 //: complete set of valid operand types
+ 41 
+ 42 :(before "End Globals")
+ 43 set<string> Instruction_operands;
+ 44 :(before "End One-time Setup")
+ 45 Instruction_operands.insert("subop");
+ 46 Instruction_operands.insert("mod");
+ 47 Instruction_operands.insert("rm32");
+ 48 Instruction_operands.insert("base");
+ 49 Instruction_operands.insert("index");
+ 50 Instruction_operands.insert("scale");
+ 51 Instruction_operands.insert("r32");
+ 52 Instruction_operands.insert("disp8");
+ 53 Instruction_operands.insert("disp16");
+ 54 Instruction_operands.insert("disp32");
+ 55 Instruction_operands.insert("imm8");
+ 56 Instruction_operands.insert("imm32");
+ 57 
+ 58 :(before "End Help Texts")
+ 59 init_operand_type_help();
+ 60 :(code)
+ 61 void init_operand_type_help() {
+ 62   put(Help, "mod",
+ 63     "2-bit operand controlling the _addressing mode_ of many instructions,\n"
+ 64     "to determine how to compute the _effective address_ to look up memory at\n"
+ 65     "based on the 'rm32' operand and potentially others.\n"
+ 66     "\n"
+ 67     "If mod = 3, just operate on the contents of the register specified by rm32\n"
+ 68     "            (direct mode).\n"
+ 69     "If mod = 2, effective address is usually* rm32 + disp32\n"
+ 70     "            (indirect mode with displacement).\n"
+ 71     "If mod = 1, effective address is usually* rm32 + disp8\n"
+ 72     "            (indirect mode with displacement).\n"
+ 73     "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
+ 74     "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
+ 75     "     Using it as an address gets more involved. For more details,\n"
+ 76     "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
+ 77     "\n"
+ 78     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
+ 79     "\"32-bit addressing forms with the ModR/M byte\".\n"
+ 80     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+ 81   );
+ 82   put(Help, "subop",
+ 83     "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
+ 84     "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
+ 85   );
+ 86   put(Help, "r32",
+ 87     "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
+ 88   );
+ 89   put(Help, "rm32",
+ 90     "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
+ 91     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
+ 92     "\"32-bit addressing forms with the ModR/M byte\".\n"
+ 93     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+ 94   );
+ 95   put(Help, "base",
+ 96     "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
+ 97     "This address may be further modified by 'index' and 'scale' operands.\n"
+ 98     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+ 99     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+100     "\"32-bit addressing forms with the SIB byte\".\n"
+101     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+102   );
+103   put(Help, "index",
+104     "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
+105     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+106     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+107     "\"32-bit addressing forms with the SIB byte\".\n"
+108     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+109   );
+110   put(Help, "scale",
+111     "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
+112     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
+113     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+114     "\"32-bit addressing forms with the SIB byte\".\n"
+115     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+116   );
+117   put(Help, "disp8",
+118     "8-bit value to be added in many instructions.\n"
+119   );
+120   put(Help, "disp16",
+121     "16-bit value to be added in many instructions.\n"
+122   );
+123   put(Help, "disp32",
+124     "32-bit value to be added in many instructions.\n"
+125   );
+126   put(Help, "imm8",
+127     "8-bit value for many instructions.\n"
+128   );
+129   put(Help, "imm32",
+130     "32-bit value for many instructions.\n"
+131   );
+132 }
+133 
+134 //:: transform packing operands into bytes in the right order
+135 
+136 :(before "End Transforms")
+137 // Begin Level-2 Transforms
+138 Transform.push_back(pack_operands);
+139 // End Level-2 Transforms
+140 
+141 :(code)
+142 void pack_operands(program& p) {
+143   if (p.segments.empty()) return;
+144   segment& code = p.segments.at(0);
+145   // Pack Operands(segment code)
+146   trace(99, "transform") << "-- pack operands" << end();
+147   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+148     line& inst = code.lines.at(i);
+149     if (all_hex_bytes(inst)) continue;
+150     trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
+151     pack_operands(inst);
+152     trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
+153   }
+154 }
+155 
+156 void pack_operands(line& inst) {
+157   line new_inst;
+158   add_opcodes(inst, new_inst);
+159   add_modrm_byte(inst, new_inst);
+160   add_sib_byte(inst, new_inst);
+161   add_disp_bytes(inst, new_inst);
+162   add_imm_bytes(inst, new_inst);
+163   inst.words.swap(new_inst.words);
+164 }
+165 
+166 void add_opcodes(const line& in, line& out) {
+167   out.words.push_back(in.words.at(0));
+168   if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
+169     out.words.push_back(in.words.at(1));
+170   if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
+171     out.words.push_back(in.words.at(2));
+172   if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
+173     out.words.push_back(in.words.at(2));
+174 }
+175 
+176 void add_modrm_byte(const line& in, line& out) {
+177   uint8_t mod=0, reg_subop=0, rm32=0;
+178   bool emit = false;
+179   for (int i = 0;  i < SIZE(in.words);  ++i) {
+180     const word& curr = in.words.at(i);
+181     if (has_metadata(curr, "mod")) {
+182       mod = hex_byte(curr.data);
+183       emit = true;
+184     }
+185     else if (has_metadata(curr, "rm32")) {
+186       rm32 = hex_byte(curr.data);
+187       emit = true;
+188     }
+189     else if (has_metadata(curr, "r32")) {
+190       reg_subop = hex_byte(curr.data);
+191       emit = true;
+192     }
+193     else if (has_metadata(curr, "subop")) {
+194       reg_subop = hex_byte(curr.data);
+195       emit = true;
+196     }
+197   }
+198   if (emit)
+199     out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
+200 }
+201 
+202 void add_sib_byte(const line& in, line& out) {
+203   uint8_t scale=0, index=0, base=0;
+204   bool emit = false;
+205   for (int i = 0;  i < SIZE(in.words);  ++i) {
+206     const word& curr = in.words.at(i);
+207     if (has_metadata(curr, "scale")) {
+208       scale = hex_byte(curr.data);
+209       emit = true;
+210     }
+211     else if (has_metadata(curr, "index")) {
+212       index = hex_byte(curr.data);
+213       emit = true;
+214     }
+215     else if (has_metadata(curr, "base")) {
+216       base = hex_byte(curr.data);
+217       emit = true;
+218     }
+219   }
+220   if (emit)
+221     out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
+222 }
+223 
+224 void add_disp_bytes(const line& in, line& out) {
+225   for (int i = 0;  i < SIZE(in.words);  ++i) {
+226     const word& curr = in.words.at(i);
+227     if (has_metadata(curr, "disp8"))
+228       emit_hex_bytes(out, curr, 1);
+229     if (has_metadata(curr, "disp16"))
+230       emit_hex_bytes(out, curr, 2);
+231     else if (has_metadata(curr, "disp32"))
+232       emit_hex_bytes(out, curr, 4);
+233   }
+234 }
+235 
+236 void add_imm_bytes(const line& in, line& out) {
+237   for (int i = 0;  i < SIZE(in.words);  ++i) {
+238     const word& curr = in.words.at(i);
+239     if (has_metadata(curr, "imm8"))
+240       emit_hex_bytes(out, curr, 1);
+241     else if (has_metadata(curr, "imm32"))
+242       emit_hex_bytes(out, curr, 4);
+243   }
+244 }
+245 
+246 void emit_hex_bytes(line& out, const word& w, int num) {
+247   assert(num <= 4);
+248   if (num == 1 || !is_hex_int(w.data)) {
+249     out.words.push_back(w);
+250     if (is_hex_int(w.data))
+251       out.words.back().data = hex_byte_to_string(parse_int(w.data));
+252     return;
+253   }
+254   emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
+255 }
+256 
+257 void emit_hex_bytes(line& out, uint32_t val, int num) {
+258   assert(num <= 4);
+259   for (int i = 0;  i < num;  ++i) {
+260     out.words.push_back(hex_byte_text(val & 0xff));
+261     val = val >> 8;
+262   }
+263 }
+264 
+265 word hex_byte_text(uint8_t val) {
+266   word result;
+267   result.data = hex_byte_to_string(val);
+268   result.original = result.data+"/auto";
+269   return result;
+270 }
+271 
+272 string hex_byte_to_string(uint8_t val) {
+273   ostringstream out;
+274   out << HEXBYTE << NUM(val);
+275   return out.str();
+276 }
+277 
+278 string to_string(const vector<word>& in) {
+279   ostringstream out;
+280   for (int i = 0;  i < SIZE(in);  ++i) {
+281     if (i > 0) out << ' ';
+282     out << in.at(i).data;
+283   }
+284   return out.str();
+285 }
+286 
+287 :(before "End Unit Tests")
+288 void test_preserve_metadata_when_emitting_single_byte() {
+289   word in;
+290   in.data = "f0";
+291   in.original = "f0/foo";
+292   line out;
+293   emit_hex_bytes(out, in, 1);
+294   CHECK_EQ(out.words.at(0).data, "f0");
+295   CHECK_EQ(out.words.at(0).original, "f0/foo");
+296 }
+297 
+298 :(scenario pack_disp8)
+299 == 0x1
+300 74 2/disp8  # jump 2 bytes away if ZF is set
+301 +transform: packing instruction '74 2/disp8'
+302 +transform: instruction after packing: '74 02'
+303 
+304 :(scenarios transform)
+305 :(scenario pack_disp8_negative)
+306 == 0x1
+307 # running this will cause an infinite loop
+308 74 -1/disp8  # jump 1 byte before if ZF is set
+309 +transform: packing instruction '74 -1/disp8'
+310 +transform: instruction after packing: '74 ff'
+311 :(scenarios run)
+312 
+313 //: helper for scenario
+314 :(code)
+315 void transform(const string& text_bytes) {
+316   program p;
+317   istringstream in(text_bytes);
+318   parse(in, p);
+319   if (trace_contains_errors()) return;
+320   transform(p);
+321 }
+322 
+323 :(scenario pack_modrm_imm32)
+324 == 0x1
+325 # instruction                     effective address                                                   operand     displacement    immediate
+326 # op          subop               mod             rm32          base        index         scale       r32
+327 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+328   81          0/add/subop         3/mod/direct    3/ebx/rm32                                                                      1/imm32           # add 1 to EBX
+329 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
+330 +transform: instruction after packing: '81 c3 01 00 00 00'
+331 
+332 :(scenario pack_imm32_large)
+333 == 0x1
+334 b9 0x080490a7/imm32  # copy to ECX
+335 +transform: packing instruction 'b9 0x080490a7/imm32'
+336 +transform: instruction after packing: 'b9 a7 90 04 08'
+337 
+338 :(scenario pack_immediate_constants_hex)
+339 == 0x1
+340 # instruction                     effective address                                                   operand     displacement    immediate
+341 # op          subop               mod             rm32          base        index         scale       r32
+342 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+343   bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
+344 +transform: packing instruction 'bb 0x2a/imm32'
+345 +transform: instruction after packing: 'bb 2a 00 00 00'
+346 +run: copy imm32 0x0000002a to EBX
+347 
+348 :(scenarios transform)
+349 :(scenario pack_silently_ignores_non_hex)
+350 == 0x1
+351 # instruction                     effective address                                                   operand     displacement    immediate
+352 # op          subop               mod             rm32          base        index         scale       r32
+353 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+354   bb                                                                                                                              foo/imm32         # copy foo to EBX
+355 +transform: packing instruction 'bb foo/imm32'
+356 # no change (we're just not printing metadata to the trace)
+357 +transform: instruction after packing: 'bb foo'
+358 $error: 0
+359 :(scenarios run)
+360 
+361 //:: helpers
+362 
+363 :(code)
+364 bool all_hex_bytes(const line& inst) {
+365   for (int i = 0;  i < SIZE(inst.words);  ++i)
+366     if (!is_hex_byte(inst.words.at(i)))
+367       return false;
+368   return true;
+369 }
+370 
+371 bool is_hex_byte(const word& curr) {
+372   if (contains_any_operand_metadata(curr))
+373     return false;
+374   if (SIZE(curr.data) != 2)
+375     return false;
+376   if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
+377     return false;
+378   return true;
+379 }
+380 
+381 bool contains_any_operand_metadata(const word& word) {
+382   for (int i = 0;  i < SIZE(word.metadata);  ++i)
+383     if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
+384       return true;
+385   return false;
+386 }
+387 
+388 bool has_metadata(const line& inst, const string& m) {
+389   bool result = false;
+390   for (int i = 0;  i < SIZE(inst.words);  ++i) {
+391     if (!has_metadata(inst.words.at(i), m)) continue;
+392     if (result) {
+393       raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
+394       return false;
+395     }
+396     result = true;
+397   }
+398   return result;
+399 }
+400 
+401 bool has_metadata(const word& w, const string& m) {
+402   bool result = false;
+403   bool metadata_found = false;
+404   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+405     const string& curr = w.metadata.at(i);
+406     if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;  // ignore unrecognized metadata
+407     if (metadata_found) {
+408       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
+409       return false;
+410     }
+411     metadata_found = true;
+412     result = (curr == m);
+413   }
+414   return result;
+415 }
+416 
+417 word metadata(const line& inst, const string& m) {
+418   for (int i = 0;  i < SIZE(inst.words);  ++i)
+419     if (has_metadata(inst.words.at(i), m))
+420       return inst.words.at(i);
+421   assert(false);
+422 }
+423 
+424 bool is_hex_int(const string& s) {
+425   if (s.empty()) return false;
+426   size_t pos = 0;
+427   if (s.at(0) == '-' || s.at(0) == '+') pos++;
+428   if (s.substr(pos, pos+2) == "0x") pos += 2;
+429   return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
+430 }
+431 
+432 int32_t parse_int(const string& s) {
+433   if (s.empty()) return 0;
+434   istringstream in(s);
+435   in >> std::hex;
+436   if (s.at(0) == '-') {
+437     int32_t result = 0;
+438     in >> result;
+439     if (!in || !in.eof()) {
+440       raise << "not a number: " << s << '\n' << end();
+441       return 0;
+442     }
+443     return result;
+444   }
+445   uint32_t uresult = 0;
+446   in >> uresult;
+447   if (!in || !in.eof()) {
+448     raise << "not a number: " << s << '\n' << end();
+449     return 0;
+450   }
+451   return static_cast<int32_t>(uresult);
+452 }
+453 :(before "End Unit Tests")
+454 void test_parse_int() {
+455   CHECK_EQ(0, parse_int("0"));
+456   CHECK_EQ(0, parse_int("0x0"));
+457   CHECK_EQ(0, parse_int("0x0"));
+458   CHECK_EQ(16, parse_int("10"));  // hex always
+459   CHECK_EQ(-1, parse_int("-1"));
+460   CHECK_EQ(-1, parse_int("0xffffffff"));
+461 }
+462 
+463 :(code)
+464 string to_string(const line& inst) {
+465   ostringstream out;
+466   for (int i = 0;  i < SIZE(inst.words);  ++i) {
+467     if (i > 0) out << ' ';
+468     out << inst.words.at(i).original;
+469   }
+470   return out.str();
+471 }
+

+ + + diff --git a/html/subx/030check_operands.cc.html b/html/subx/030check_operands.cc.html deleted file mode 100644 index cf8ca1a8..00000000 --- a/html/subx/030check_operands.cc.html +++ /dev/null @@ -1,801 +0,0 @@ - - - - -Mu - subx/030check_operands.cc - - - - - - - - - - -

-  1 //: Beginning of "level 2": tagging bytes with metadata around what field of
-  2 //: an x86 instruction they're for.
-  3 //:
-  4 //: The x86 instruction set is variable-length, and how a byte is interpreted
-  5 //: affects later instruction boundaries. A lot of the pain in programming machine code
-  6 //: stems from computer and programmer going out of sync on what a byte
-  7 //: means. The miscommunication is usually not immediately caught, and
-  8 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
-  9 //: Tagging bytes with what the programmer expects them to be interpreted as
- 10 //: helps the computer catch miscommunication immediately.
- 11 //:
- 12 //: This is one way SubX is going to be different from a 'language': we
- 13 //: typically think of languages as less verbose than machine code. Here we're
- 14 //: making machine code *more* verbose.
- 15 //:
- 16 //: ---
- 17 //:
- 18 //: While we're here, we'll also improve a couple of other things in level 2:
- 19 //:
- 20 //: a) Machine code often packs logically separate operands into bitfields of
- 21 //: a single byte. In a later layer (pack_operands) we'll start writing out
- 22 //: each operand separately, and the translator will construct the right bytes
- 23 //: out of operands.
- 24 //:
- 25 //: SubX now gets still more verbose. What used to be a single byte, say 'c3',
- 26 //: can now expand to '3/mod 0/subop 3/rm32'.
- 27 //:
- 28 //: b) Since each operand is tagged, we can loosen ordering restrictions and
- 29 //: allow writing out the operands in any order, like keyword arguments.
- 30 //:
- 31 //: The actual opcodes (first 1-3 bytes of each instruction) will continue to
- 32 //: be at the start of each line. The x86 instruction set is a mess, and
- 33 //: opcodes often don't admit good names.
- 34 
- 35 :(before "End Help Texts")
- 36 put(Help, "instructions",
- 37   "Each x86 instruction consists of an instruction or opcode and some number\n"
- 38   "of operands.\n"
- 39   "Each operand has a type. An instruction won't have more than one operand of\n"
- 40   "any type.\n"
- 41   "Each instruction has some set of allowed operand types. It'll reject others.\n"
- 42   "The complete list of operand types: mod, subop, r32 (register), rm32\n"
- 43   "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
- 44   "imm32.\n"
- 45   "Each of these has its own help page. Try reading 'subx help mod' next.\n"
- 46 );
- 47 :(before "End Help Contents")
- 48 cerr << "  instructions\n";
- 49 
- 50 //:: Check for 'syntax errors'; missing or unexpected operands.
- 51 
- 52 :(scenario check_missing_imm8_operand)
- 53 % Hide_errors = true;
- 54 == 0x1
- 55 # instruction                     effective address                                                   operand     displacement    immediate
- 56 # op          subop               mod             rm32          base        index         scale       r32
- 57 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
- 58   cd                                                                                                                                                # int ??
- 59 +error: 'cd' (software interrupt): missing imm8 operand
- 60 
- 61 :(before "End Transforms")
- 62 Transform.push_back(check_operands);
- 63 
- 64 :(code)
- 65 void check_operands(/*const*/ program& p) {
- 66   trace(99, "transform") << "-- check operands" << end();
- 67   if (p.segments.empty()) return;
- 68   const segment& code = p.segments.at(0);
- 69   for (int i = 0;  i < SIZE(code.lines);  ++i) {
- 70     check_operands(code.lines.at(i));
- 71     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
- 72   }
- 73 }
- 74 
- 75 void check_operands(const line& inst) {
- 76   word op = preprocess_op(inst.words.at(0));
- 77   if (op.data == "0f") {
- 78     check_operands_0f(inst);
- 79     return;
- 80   }
- 81   if (op.data == "f3") {
- 82     check_operands_f3(inst);
- 83     return;
- 84   }
- 85   check_operands(inst, op);
- 86 }
- 87 
- 88 word preprocess_op(word/*copy*/ op) {
- 89   op.data = tolower(op.data.c_str());
- 90   if (starts_with(op.data, "0x"))
- 91     op.data = op.data.substr(2);
- 92   return op;
- 93 }
- 94 
- 95 //: To check the operands for an opcode, we'll track the permitted operands
- 96 //: for each supported opcode in a bitvector. That way we can often compute the
- 97 //: bitvector for each instruction's operands and compare it with the expected.
- 98 
- 99 :(before "End Types")
-100 enum operand_type {
-101   // start from the least significant bit
-102   MODRM,  // more complex, may also involve disp8 or disp32
-103   SUBOP,
-104   DISP8,
-105   DISP16,
-106   DISP32,
-107   IMM8,
-108   IMM32,
-109   NUM_OPERAND_TYPES
-110 };
-111 :(before "End Globals")
-112 vector<string> Operand_type_name;
-113 map<string, operand_type> Operand_type;
-114 :(before "End One-time Setup")
-115 init_op_types();
-116 :(code)
-117 void init_op_types() {
-118   assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
-119   Operand_type_name.resize(NUM_OPERAND_TYPES);
-120   #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
-121   DEF(MODRM);
-122   DEF(SUBOP);
-123   DEF(DISP8);
-124   DEF(DISP16);
-125   DEF(DISP32);
-126   DEF(IMM8);
-127   DEF(IMM32);
-128   #undef DEF
-129 }
-130 
-131 :(before "End Globals")
-132 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands;
-133 const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the operand types
-134 :(before "End One-time Setup")
-135 init_permitted_operands();
-136 :(code)
-137 void init_permitted_operands() {
-138   //// Class A: just op, no operands
-139   // halt
-140   put(Permitted_operands, "f4", 0x00);
-141   // push
-142   put(Permitted_operands, "50", 0x00);
-143   put(Permitted_operands, "51", 0x00);
-144   put(Permitted_operands, "52", 0x00);
-145   put(Permitted_operands, "53", 0x00);
-146   put(Permitted_operands, "54", 0x00);
-147   put(Permitted_operands, "55", 0x00);
-148   put(Permitted_operands, "56", 0x00);
-149   put(Permitted_operands, "57", 0x00);
-150   // pop
-151   put(Permitted_operands, "58", 0x00);
-152   put(Permitted_operands, "59", 0x00);
-153   put(Permitted_operands, "5a", 0x00);
-154   put(Permitted_operands, "5b", 0x00);
-155   put(Permitted_operands, "5c", 0x00);
-156   put(Permitted_operands, "5d", 0x00);
-157   put(Permitted_operands, "5e", 0x00);
-158   put(Permitted_operands, "5f", 0x00);
-159   // return
-160   put(Permitted_operands, "c3", 0x00);
-161 
-162   //// Class B: just op and disp8
-163   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-164   //  0     0     0      |0       1     0     0
-165 
-166   // jump
-167   put(Permitted_operands, "eb", 0x04);
-168   put(Permitted_operands, "74", 0x04);
-169   put(Permitted_operands, "75", 0x04);
-170   put(Permitted_operands, "7c", 0x04);
-171   put(Permitted_operands, "7d", 0x04);
-172   put(Permitted_operands, "7e", 0x04);
-173   put(Permitted_operands, "7f", 0x04);
-174 
-175   //// Class C: just op and disp16
-176   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-177   //  0     0     0      |1       0     0     0
-178   put(Permitted_operands, "e9", 0x08);  // jump
-179 
-180   //// Class D: just op and disp32
-181   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-182   //  0     0     1      |0       0     0     0
-183   put(Permitted_operands, "e8", 0x10);  // call
-184 
-185   //// Class E: just op and imm8
-186   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-187   //  0     1     0      |0       0     0     0
-188   put(Permitted_operands, "cd", 0x20);  // software interrupt
-189 
-190   //// Class F: just op and imm32
-191   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-192   //  1     0     0      |0       0     0     0
-193   put(Permitted_operands, "05", 0x40);  // add
-194   put(Permitted_operands, "2d", 0x40);  // subtract
-195   put(Permitted_operands, "25", 0x40);  // and
-196   put(Permitted_operands, "0d", 0x40);  // or
-197   put(Permitted_operands, "35", 0x40);  // xor
-198   put(Permitted_operands, "3d", 0x40);  // compare
-199   put(Permitted_operands, "68", 0x40);  // push
-200   // copy
-201   put(Permitted_operands, "b8", 0x40);
-202   put(Permitted_operands, "b9", 0x40);
-203   put(Permitted_operands, "ba", 0x40);
-204   put(Permitted_operands, "bb", 0x40);
-205   put(Permitted_operands, "bc", 0x40);
-206   put(Permitted_operands, "bd", 0x40);
-207   put(Permitted_operands, "be", 0x40);
-208   put(Permitted_operands, "bf", 0x40);
-209 
-210   //// Class M: using ModR/M byte
-211   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-212   //  0     0     0      |0       0     0     1
-213 
-214   // add
-215   put(Permitted_operands, "01", 0x01);
-216   put(Permitted_operands, "03", 0x01);
-217   // subtract
-218   put(Permitted_operands, "29", 0x01);
-219   put(Permitted_operands, "2b", 0x01);
-220   // and
-221   put(Permitted_operands, "21", 0x01);
-222   put(Permitted_operands, "23", 0x01);
-223   // or
-224   put(Permitted_operands, "09", 0x01);
-225   put(Permitted_operands, "0b", 0x01);
-226   // complement
-227   put(Permitted_operands, "f7", 0x01);
-228   // xor
-229   put(Permitted_operands, "31", 0x01);
-230   put(Permitted_operands, "33", 0x01);
-231   // compare
-232   put(Permitted_operands, "39", 0x01);
-233   put(Permitted_operands, "3b", 0x01);
-234   // copy
-235   put(Permitted_operands, "89", 0x01);
-236   put(Permitted_operands, "8b", 0x01);
-237   // swap
-238   put(Permitted_operands, "87", 0x01);
-239   // pop
-240   put(Permitted_operands, "8f", 0x01);
-241 
-242   //// Class O: op, ModR/M and subop (not r32)
-243   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-244   //  0     0     0      |0       0     1     1
-245   put(Permitted_operands, "ff", 0x03);  // jump/push/call
-246 
-247   //// Class N: op, ModR/M and imm32
-248   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-249   //  1     0     0      |0       0     0     1
-250   put(Permitted_operands, "c7", 0x41);  // copy
-251 
-252   //// Class P: op, ModR/M, subop (not r32) and imm32
-253   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-254   //  1     0     0      |0       0     1     1
-255   put(Permitted_operands, "81", 0x43);  // combine
-256 
-257   // End Init Permitted Operands
-258 }
-259 
-260 :(code)
-261 #define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
-262 #define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
-263 #define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
-264 
-265 void check_operands(const line& inst, const word& op) {
-266   if (!is_hex_byte(op)) return;
-267   uint8_t expected_bitvector = get(Permitted_operands, op.data);
-268   if (HAS(expected_bitvector, MODRM)) {
-269     check_operands_modrm(inst, op);
-270     compare_bitvector_modrm(inst, expected_bitvector, op);
-271   }
-272   else {
-273     compare_bitvector(inst, expected_bitvector, op);
-274   }
-275 }
-276 
-277 //: Many instructions can be checked just by comparing bitvectors.
-278 
-279 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
-280   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
-281   uint8_t bitvector = compute_operand_bitvector(inst);
-282   if (trace_contains_errors()) return;  // duplicate operand type
-283   if (bitvector == expected) return;  // all good with this instruction
-284   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
-285 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
-286     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
-287     const string& optype = Operand_type_name.at(i);
-288     if ((bitvector & 0x1) > (expected & 0x1))
-289       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
-290     else
-291       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
-292     // continue giving all errors for a single instruction
-293   }
-294   // ignore settings in any unused bits
-295 }
-296 
-297 string maybe_name(const word& op) {
-298   if (!is_hex_byte(op)) return "";
-299   if (!contains_key(name, op.data)) return "";
-300   return " ("+get(name, op.data)+')';
-301 }
-302 
-303 bool is_hex_byte(const word& curr) {
-304   if (contains_any_operand_metadata(curr))
-305     return false;
-306   if (SIZE(curr.data) != 2)
-307     return false;
-308   if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
-309     return false;
-310   return true;
-311 }
-312 
-313 uint32_t compute_operand_bitvector(const line& inst) {
-314   uint32_t bitvector = 0;
-315   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
-316     bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
-317     if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate operand type
-318   }
-319   return bitvector;
-320 }
-321 
-322 bool has_operands(const line& inst) {
-323   return SIZE(inst.words) > first_operand(inst);
-324 }
-325 
-326 int first_operand(const line& inst) {
-327   if (inst.words.at(0).data == "0f") return 2;
-328   if (inst.words.at(0).data == "f3") {
-329     if (inst.words.at(1).data == "0f")
-330       return 3;
-331     else
-332       return 2;
-333   }
-334   return 1;
-335 }
-336 
-337 bool all_hex_bytes(const line& inst) {
-338   for (int i = 0;  i < SIZE(inst.words);  ++i)
-339     if (!is_hex_byte(inst.words.at(i)))
-340       return false;
-341   return true;
-342 }
-343 
-344 bool contains_any_operand_metadata(const word& word) {
-345   for (int i = 0;  i < SIZE(word.metadata);  ++i)
-346     if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
-347       return true;
-348   return false;
-349 }
-350 
-351 // Scan the metadata of 'w' and return the bit corresponding to any operand type.
-352 // Also raise an error if metadata contains multiple operand types.
-353 uint32_t bitvector_for_operand(const word& w) {
-354   uint32_t bv = 0;
-355   bool found = false;
-356   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
-357     const string& curr = w.metadata.at(i);
-358     if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
-359     if (found) {
-360       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
-361       return INVALID_OPERANDS;
-362     }
-363     bv = (1 << get(Operand_type, curr));
-364     found = true;
-365   }
-366   return bv;
-367 }
-368 
-369 :(scenario conflicting_operand_type)
-370 % Hide_errors = true;
-371 == 0x1
-372 cd/software-interrupt 80/imm8/imm32
-373 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
-374 
-375 //: Instructions computing effective addresses have more complex rules, so
-376 //: we'll hard-code a common set of instruction-decoding rules.
-377 
-378 :(scenario check_missing_mod_operand)
-379 % Hide_errors = true;
-380 == 0x1
-381 81 0/add/subop       3/rm32/ebx 1/imm32
-382 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
-383 
-384 :(before "End Globals")
-385 set<string> Instruction_operands;
-386 :(before "End One-time Setup")
-387 Instruction_operands.insert("subop");
-388 Instruction_operands.insert("mod");
-389 Instruction_operands.insert("rm32");
-390 Instruction_operands.insert("base");
-391 Instruction_operands.insert("index");
-392 Instruction_operands.insert("scale");
-393 Instruction_operands.insert("r32");
-394 Instruction_operands.insert("disp8");
-395 Instruction_operands.insert("disp16");
-396 Instruction_operands.insert("disp32");
-397 Instruction_operands.insert("imm8");
-398 Instruction_operands.insert("imm32");
-399 
-400 :(code)
-401 void check_operands_modrm(const line& inst, const word& op) {
-402   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
-403   check_metadata_present(inst, "mod", op);
-404   check_metadata_present(inst, "rm32", op);
-405   // no check for r32; some instructions don't use it; just assume it's 0 if missing
-406   if (op.data == "81" || op.data == "8f" || op.data == "ff") {  // keep sync'd with 'help subop'
-407     check_metadata_present(inst, "subop", op);
-408     check_metadata_absent(inst, "r32", op, "should be replaced by subop");
-409   }
-410   if (trace_contains_errors()) return;
-411   if (metadata(inst, "rm32").data != "4") return;
-412   // SIB byte checks
-413   uint8_t mod = hex_byte(metadata(inst, "mod").data);
-414   if (mod != /*direct*/3) {
-415     check_metadata_present(inst, "base", op);
-416     check_metadata_present(inst, "index", op);  // otherwise why go to SIB?
-417   }
-418   else {
-419     check_metadata_absent(inst, "base", op, "direct mode");
-420     check_metadata_absent(inst, "index", op, "direct mode");
-421   }
-422   // no check for scale; 0 (2**0 = 1) by default
-423 }
-424 
-425 // same as compare_bitvector, with a couple of exceptions for modrm-based instructions
-426 //   exception 1: ignore modrm bit since we already checked it above
-427 //   exception 2: modrm instructions can use a displacement on occasion
-428 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
-429   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
-430   uint8_t bitvector = compute_operand_bitvector(inst);
-431   if (trace_contains_errors()) return;  // duplicate operand type
-432   expected = CLEAR(expected, MODRM);  // exception 1
-433   if (bitvector == expected) return;  // all good with this instruction
-434   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
-435 //?     cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
-436     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
-437     if (i == DISP8 || i == DISP32) continue;  // exception 2
-438     const string& optype = Operand_type_name.at(i);
-439     if ((bitvector & 0x1) > (expected & 0x1))
-440       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
-441     else
-442       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
-443     // continue giving all errors for a single instruction
-444   }
-445   // ignore settings in any unused bits
-446 }
-447 
-448 void check_metadata_present(const line& inst, const string& type, const word& op) {
-449   if (!has_metadata(inst, type))
-450     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end();
-451 }
-452 
-453 void check_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
-454   if (has_metadata(inst, type))
-455     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
-456 }
-457 
-458 bool has_metadata(const line& inst, const string& m) {
-459   bool result = false;
-460   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-461     if (!has_metadata(inst.words.at(i), m)) continue;
-462     if (result) {
-463       raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
-464       return false;
-465     }
-466     result = true;
-467   }
-468   return result;
-469 }
-470 
-471 bool has_metadata(const word& w, const string& m) {
-472   bool result = false;
-473   bool metadata_found = false;
-474   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
-475     const string& curr = w.metadata.at(i);
-476     if (!contains_key(Instruction_operands, curr)) continue;  // ignore unrecognized metadata
-477     if (metadata_found) {
-478       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
-479       return false;
-480     }
-481     metadata_found = true;
-482     result = (curr == m);
-483   }
-484   return result;
-485 }
-486 
-487 word metadata(const line& inst, const string& m) {
-488   for (int i = 0;  i < SIZE(inst.words);  ++i)
-489     if (has_metadata(inst.words.at(i), m))
-490       return inst.words.at(i);
-491   assert(false);
-492 }
-493 
-494 :(scenarios transform)
-495 :(scenario modrm_with_displacement)
-496 % Reg[EAX].u = 0x1;
-497 == 0x1
-498 # just avoid null pointer
-499 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8  # copy *(EAX+4) to EDX
-500 $error: 0
-501 :(scenarios run)
-502 
-503 //: helper for scenario
-504 :(code)
-505 void transform(const string& text_bytes) {
-506   program p;
-507   istringstream in(text_bytes);
-508   parse(in, p);
-509   if (trace_contains_errors()) return;
-510   transform(p);
-511 }
-512 
-513 :(scenario conflicting_operands_in_modrm_instruction)
-514 % Hide_errors = true;
-515 == 0x1
-516 01/add 0/mod 3/mod
-517 +error: '01/add 0/mod 3/mod' has conflicting mod operands
-518 
-519 :(scenario conflicting_operand_type_modrm)
-520 % Hide_errors = true;
-521 == 0x1
-522 01/add 0/mod 3/rm32/r32
-523 +error: '3/rm32/r32' has conflicting operand types; it should have only one
-524 
-525 :(scenario check_missing_rm32_operand)
-526 % Hide_errors = true;
-527 == 0x1
-528 81 0/add/subop 0/mod            1/imm32
-529 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
-530 
-531 :(scenario check_missing_subop_operand)
-532 % Hide_errors = true;
-533 == 0x1
-534 81             0/mod 3/rm32/ebx 1/imm32
-535 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
-536 
-537 :(scenario check_missing_base_operand)
-538 % Hide_errors = true;
-539 == 0x1
-540 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
-541 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
-542 
-543 :(scenario check_missing_index_operand)
-544 % Hide_errors = true;
-545 == 0x1
-546 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
-547 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
-548 
-549 :(scenario check_missing_base_operand_2)
-550 % Hide_errors = true;
-551 == 0x1
-552 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
-553 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
-554 
-555 :(scenario check_base_operand_not_needed_in_direct_mode)
-556 == 0x1
-557 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
-558 $error: 0
-559 
-560 //:: similarly handle multi-byte opcodes
-561 
-562 :(code)
-563 void check_operands_0f(const line& inst) {
-564   assert(inst.words.at(0).data == "0f");
-565   if (SIZE(inst.words) == 1) {
-566     raise << "opcode '0f' requires a second opcode\n" << end();
-567     return;
-568   }
-569   word op = preprocess_op(inst.words.at(1));
-570   if (!contains_key(name_0f, op.data)) {
-571     raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
-572     return;
-573   }
-574   check_operands_0f(inst, op);
-575 }
-576 
-577 void check_operands_f3(const line& /*unused*/) {
-578   raise << "no supported opcodes starting with f3\n" << end();
-579 }
-580 
-581 :(scenario check_missing_disp16_operand)
-582 % Hide_errors = true;
-583 == 0x1
-584 # instruction                     effective address                                                   operand     displacement    immediate
-585 # op          subop               mod             rm32          base        index         scale       r32
-586 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-587   0f 84                                                                                                                                             # jmp if ZF to ??
-588 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand
-589 
-590 :(before "End Globals")
-591 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f;
-592 :(before "End Init Permitted Operands")
-593 //// Class C: just op and disp16
-594 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-595 //  0     0     0      |1       0     0     0
-596 put(Permitted_operands_0f, "84", 0x08);
-597 put(Permitted_operands_0f, "85", 0x08);
-598 put(Permitted_operands_0f, "8c", 0x08);
-599 put(Permitted_operands_0f, "8d", 0x08);
-600 put(Permitted_operands_0f, "8e", 0x08);
-601 put(Permitted_operands_0f, "8f", 0x08);
-602 
-603 //// Class M: using ModR/M byte
-604 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
-605 //  0     0     0      |0       0     0     1
-606 put(Permitted_operands_0f, "af", 0x01);
-607 
-608 :(code)
-609 void check_operands_0f(const line& inst, const word& op) {
-610   uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
-611   if (HAS(expected_bitvector, MODRM))
-612     check_operands_modrm(inst, op);
-613   compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
-614 }
-615 
-616 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
-617   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
-618   uint8_t bitvector = compute_operand_bitvector(inst);
-619   if (trace_contains_errors()) return;  // duplicate operand type
-620   if (bitvector == expected) return;  // all good with this instruction
-621   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
-622 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
-623     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
-624     const string& optype = Operand_type_name.at(i);
-625     if ((bitvector & 0x1) > (expected & 0x1))
-626       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end();
-627     else
-628       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end();
-629     // continue giving all errors for a single instruction
-630   }
-631   // ignore settings in any unused bits
-632 }
-633 
-634 string to_string(const line& inst) {
-635   ostringstream out;
-636   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-637     if (i > 0) out << ' ';
-638     out << inst.words.at(i).original;
-639   }
-640   return out.str();
-641 }
-642 
-643 string tolower(const char* s) {
-644   ostringstream out;
-645   for (/*nada*/;  *s;  ++s)
-646     out << static_cast<char>(tolower(*s));
-647   return out.str();
-648 }
-649 
-650 #undef HAS
-651 #undef SET
-652 #undef CLEAR
-653 
-654 //:: docs on each operand type
-655 
-656 :(before "End Help Texts")
-657 init_operand_type_help();
-658 :(code)
-659 void init_operand_type_help() {
-660   put(Help, "mod",
-661     "2-bit operand controlling the _addressing mode_ of many instructions,\n"
-662     "to determine how to compute the _effective address_ to look up memory at\n"
-663     "based on the 'rm32' operand and potentially others.\n"
-664     "\n"
-665     "If mod = 3, just operate on the contents of the register specified by rm32\n"
-666     "            (direct mode).\n"
-667     "If mod = 2, effective address is usually* rm32 + disp32\n"
-668     "            (indirect mode with displacement).\n"
-669     "If mod = 1, effective address is usually* rm32 + disp8\n"
-670     "            (indirect mode with displacement).\n"
-671     "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
-672     "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
-673     "     Using it as an address gets more involved. For more details,\n"
-674     "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
-675     "\n"
-676     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
-677     "\"32-bit addressing forms with the ModR/M byte\".\n"
-678     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-679   );
-680   put(Help, "subop",
-681     "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
-682     "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
-683   );
-684   put(Help, "r32",
-685     "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
-686   );
-687   put(Help, "rm32",
-688     "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
-689     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
-690     "\"32-bit addressing forms with the ModR/M byte\".\n"
-691     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-692   );
-693   put(Help, "base",
-694     "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
-695     "This address may be further modified by 'index' and 'scale' operands.\n"
-696     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-697     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
-698     "\"32-bit addressing forms with the SIB byte\".\n"
-699     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-700   );
-701   put(Help, "index",
-702     "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
-703     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-704     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
-705     "\"32-bit addressing forms with the SIB byte\".\n"
-706     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-707   );
-708   put(Help, "scale",
-709     "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
-710     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
-711     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
-712     "\"32-bit addressing forms with the SIB byte\".\n"
-713     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-714   );
-715   put(Help, "disp8",
-716     "8-bit value to be added in many instructions.\n"
-717   );
-718   put(Help, "disp16",
-719     "16-bit value to be added in many instructions.\n"
-720   );
-721   put(Help, "disp32",
-722     "32-bit value to be added in many instructions.\n"
-723   );
-724   put(Help, "imm8",
-725     "8-bit value for many instructions.\n"
-726   );
-727   put(Help, "imm32",
-728     "32-bit value for many instructions.\n"
-729   );
-730 }
-731 
-732 :(before "End Includes")
-733 #include<cctype>
-

- - - diff --git a/html/subx/031check_operand_bounds.cc.html b/html/subx/031check_operand_bounds.cc.html deleted file mode 100644 index b484db41..00000000 --- a/html/subx/031check_operand_bounds.cc.html +++ /dev/null @@ -1,141 +0,0 @@ - - - - -Mu - subx/031check_operand_bounds.cc - - - - - - - - - - -

- 1 //:: Check that the different operands of an instruction aren't too large for their bitfields.
- 2 
- 3 :(scenario check_bitfield_sizes)
- 4 % Hide_errors = true;
- 5 == 0x1
- 6 01/add 4/mod
- 7 +error: '4/mod' too large to fit in bitfield mod
- 8 
- 9 :(before "End Globals")
-10 map<string, uint32_t> Operand_bound;
-11 :(before "End One-time Setup")
-12 put(Operand_bound, "subop", 1<<3);
-13 put(Operand_bound, "mod", 1<<2);
-14 put(Operand_bound, "rm32", 1<<3);
-15 put(Operand_bound, "base", 1<<3);
-16 put(Operand_bound, "index", 1<<3);
-17 put(Operand_bound, "scale", 1<<2);
-18 put(Operand_bound, "r32", 1<<3);
-19 put(Operand_bound, "disp8", 1<<8);
-20 put(Operand_bound, "disp16", 1<<16);
-21 // no bound needed for disp32
-22 put(Operand_bound, "imm8", 1<<8);
-23 // no bound needed for imm32
-24 
-25 :(before "End Transforms")
-26 Transform.push_back(check_operand_bounds);
-27 :(code)
-28 void check_operand_bounds(/*const*/ program& p) {
-29   trace(99, "transform") << "-- check operand bounds" << end();
-30   if (p.segments.empty()) return;
-31   const segment& code = p.segments.at(0);
-32   for (int i = 0;  i < SIZE(code.lines);  ++i) {
-33     const line& inst = code.lines.at(i);
-34     for (int j = first_operand(inst);  j < SIZE(inst.words);  ++j)
-35       check_operand_bounds(inst.words.at(j));
-36     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
-37   }
-38 }
-39 
-40 void check_operand_bounds(const word& w) {
-41   for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p) {
-42     if (!has_metadata(w, p->first)) continue;
-43     if (!is_hex_int(w.data)) continue;  // later transforms are on their own to do their own bounds checking
-44     int32_t x = parse_int(w.data);
-45     if (x >= 0) {
-46       if (static_cast<uint32_t>(x) >= p->second)
-47         raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
-48     }
-49     else {
-50       // hacky? assuming bound is a power of 2
-51       if (x < -1*static_cast<int32_t>(p->second/2))
-52         raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
-53     }
-54   }
-55 }
-56 
-57 bool is_hex_int(const string& s) {
-58   if (s.empty()) return false;
-59   size_t pos = 0;
-60   if (s.at(0) == '-' || s.at(0) == '+') pos++;
-61   if (s.substr(pos, pos+2) == "0x") pos += 2;
-62   return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
-63 }
-64 
-65 int32_t parse_int(const string& s) {
-66   istringstream in(s);
-67   int32_t result = 0;
-68   in >> std::hex >> result;
-69   if (!in || !in.eof()) {
-70     raise << "not a number: " << s << '\n' << end();
-71     return 0;
-72   }
-73   return result;
-74 }
-

- - - diff --git a/html/subx/031check_operands.cc.html b/html/subx/031check_operands.cc.html new file mode 100644 index 00000000..09f55a7c --- /dev/null +++ b/html/subx/031check_operands.cc.html @@ -0,0 +1,588 @@ + + + + +Mu - subx/031check_operands.cc + + + + + + + + + + +

+  1 //: Since we're tagging operands with their types, let's start checking these
+  2 //: operand types for each instruction.
+  3 
+  4 :(scenario check_missing_imm8_operand)
+  5 % Hide_errors = true;
+  6 == 0x1
+  7 # instruction                     effective address                                                   operand     displacement    immediate
+  8 # op          subop               mod             rm32          base        index         scale       r32
+  9 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+ 10   cd                                                                                                                                                # int ??
+ 11 +error: 'cd' (software interrupt): missing imm8 operand
+ 12 
+ 13 :(before "Pack Operands(segment code)")
+ 14 check_operands(code);
+ 15 if (trace_contains_errors()) return;
+ 16 
+ 17 :(code)
+ 18 void check_operands(const segment& code) {
+ 19   trace(99, "transform") << "-- check operands" << end();
+ 20   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+ 21     check_operands(code.lines.at(i));
+ 22     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
+ 23   }
+ 24 }
+ 25 
+ 26 void check_operands(const line& inst) {
+ 27   word op = preprocess_op(inst.words.at(0));
+ 28   if (op.data == "0f") {
+ 29     check_operands_0f(inst);
+ 30     return;
+ 31   }
+ 32   if (op.data == "f3") {
+ 33     check_operands_f3(inst);
+ 34     return;
+ 35   }
+ 36   check_operands(inst, op);
+ 37 }
+ 38 
+ 39 word preprocess_op(word/*copy*/ op) {
+ 40   op.data = tolower(op.data.c_str());
+ 41   // opcodes can't be negative
+ 42   if (starts_with(op.data, "0x"))
+ 43     op.data = op.data.substr(2);
+ 44   if (SIZE(op.data) == 1)
+ 45     op.data = string("0")+op.data;
+ 46   return op;
+ 47 }
+ 48 
+ 49 void test_preprocess_op() {
+ 50   word w1;  w1.data = "0xf";
+ 51   word w2;  w2.data = "0f";
+ 52   CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
+ 53 }
+ 54 
+ 55 //: To check the operands for an opcode, we'll track the permitted operands
+ 56 //: for each supported opcode in a bitvector. That way we can often compute the
+ 57 //: bitvector for each instruction's operands and compare it with the expected.
+ 58 
+ 59 :(before "End Types")
+ 60 enum operand_type {
+ 61   // start from the least significant bit
+ 62   MODRM,  // more complex, may also involve disp8 or disp32
+ 63   SUBOP,
+ 64   DISP8,
+ 65   DISP16,
+ 66   DISP32,
+ 67   IMM8,
+ 68   IMM32,
+ 69   NUM_OPERAND_TYPES
+ 70 };
+ 71 :(before "End Globals")
+ 72 vector<string> Operand_type_name;
+ 73 map<string, operand_type> Operand_type;
+ 74 :(before "End One-time Setup")
+ 75 init_op_types();
+ 76 :(code)
+ 77 void init_op_types() {
+ 78   assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
+ 79   Operand_type_name.resize(NUM_OPERAND_TYPES);
+ 80   #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
+ 81   DEF(MODRM);
+ 82   DEF(SUBOP);
+ 83   DEF(DISP8);
+ 84   DEF(DISP16);
+ 85   DEF(DISP32);
+ 86   DEF(IMM8);
+ 87   DEF(IMM32);
+ 88   #undef DEF
+ 89 }
+ 90 
+ 91 :(before "End Globals")
+ 92 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands;
+ 93 const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the operand types
+ 94 :(before "End One-time Setup")
+ 95 init_permitted_operands();
+ 96 :(code)
+ 97 void init_permitted_operands() {
+ 98   //// Class A: just op, no operands
+ 99   // halt
+100   put(Permitted_operands, "f4", 0x00);
+101   // push
+102   put(Permitted_operands, "50", 0x00);
+103   put(Permitted_operands, "51", 0x00);
+104   put(Permitted_operands, "52", 0x00);
+105   put(Permitted_operands, "53", 0x00);
+106   put(Permitted_operands, "54", 0x00);
+107   put(Permitted_operands, "55", 0x00);
+108   put(Permitted_operands, "56", 0x00);
+109   put(Permitted_operands, "57", 0x00);
+110   // pop
+111   put(Permitted_operands, "58", 0x00);
+112   put(Permitted_operands, "59", 0x00);
+113   put(Permitted_operands, "5a", 0x00);
+114   put(Permitted_operands, "5b", 0x00);
+115   put(Permitted_operands, "5c", 0x00);
+116   put(Permitted_operands, "5d", 0x00);
+117   put(Permitted_operands, "5e", 0x00);
+118   put(Permitted_operands, "5f", 0x00);
+119   // return
+120   put(Permitted_operands, "c3", 0x00);
+121 
+122   //// Class B: just op and disp8
+123   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+124   //  0     0     0      |0       1     0     0
+125 
+126   // jump
+127   put(Permitted_operands, "eb", 0x04);
+128   put(Permitted_operands, "74", 0x04);
+129   put(Permitted_operands, "75", 0x04);
+130   put(Permitted_operands, "7c", 0x04);
+131   put(Permitted_operands, "7d", 0x04);
+132   put(Permitted_operands, "7e", 0x04);
+133   put(Permitted_operands, "7f", 0x04);
+134 
+135   //// Class C: just op and disp16
+136   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+137   //  0     0     0      |1       0     0     0
+138   put(Permitted_operands, "e9", 0x08);  // jump
+139 
+140   //// Class D: just op and disp32
+141   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+142   //  0     0     1      |0       0     0     0
+143   put(Permitted_operands, "e8", 0x10);  // call
+144 
+145   //// Class E: just op and imm8
+146   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+147   //  0     1     0      |0       0     0     0
+148   put(Permitted_operands, "cd", 0x20);  // software interrupt
+149 
+150   //// Class F: just op and imm32
+151   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+152   //  1     0     0      |0       0     0     0
+153   put(Permitted_operands, "05", 0x40);  // add
+154   put(Permitted_operands, "2d", 0x40);  // subtract
+155   put(Permitted_operands, "25", 0x40);  // and
+156   put(Permitted_operands, "0d", 0x40);  // or
+157   put(Permitted_operands, "35", 0x40);  // xor
+158   put(Permitted_operands, "3d", 0x40);  // compare
+159   put(Permitted_operands, "68", 0x40);  // push
+160   // copy
+161   put(Permitted_operands, "b8", 0x40);
+162   put(Permitted_operands, "b9", 0x40);
+163   put(Permitted_operands, "ba", 0x40);
+164   put(Permitted_operands, "bb", 0x40);
+165   put(Permitted_operands, "bc", 0x40);
+166   put(Permitted_operands, "bd", 0x40);
+167   put(Permitted_operands, "be", 0x40);
+168   put(Permitted_operands, "bf", 0x40);
+169 
+170   //// Class M: using ModR/M byte
+171   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+172   //  0     0     0      |0       0     0     1
+173 
+174   // add
+175   put(Permitted_operands, "01", 0x01);
+176   put(Permitted_operands, "03", 0x01);
+177   // subtract
+178   put(Permitted_operands, "29", 0x01);
+179   put(Permitted_operands, "2b", 0x01);
+180   // and
+181   put(Permitted_operands, "21", 0x01);
+182   put(Permitted_operands, "23", 0x01);
+183   // or
+184   put(Permitted_operands, "09", 0x01);
+185   put(Permitted_operands, "0b", 0x01);
+186   // complement
+187   put(Permitted_operands, "f7", 0x01);
+188   // xor
+189   put(Permitted_operands, "31", 0x01);
+190   put(Permitted_operands, "33", 0x01);
+191   // compare
+192   put(Permitted_operands, "39", 0x01);
+193   put(Permitted_operands, "3b", 0x01);
+194   // copy
+195   put(Permitted_operands, "89", 0x01);
+196   put(Permitted_operands, "8b", 0x01);
+197   // swap
+198   put(Permitted_operands, "87", 0x01);
+199   // pop
+200   put(Permitted_operands, "8f", 0x01);
+201 
+202   //// Class O: op, ModR/M and subop (not r32)
+203   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+204   //  0     0     0      |0       0     1     1
+205   put(Permitted_operands, "ff", 0x03);  // jump/push/call
+206 
+207   //// Class N: op, ModR/M and imm32
+208   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+209   //  1     0     0      |0       0     0     1
+210   put(Permitted_operands, "c7", 0x41);  // copy
+211 
+212   //// Class P: op, ModR/M, subop (not r32) and imm32
+213   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+214   //  1     0     0      |0       0     1     1
+215   put(Permitted_operands, "81", 0x43);  // combine
+216 
+217   // End Init Permitted Operands
+218 }
+219 
+220 :(code)
+221 #define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
+222 #define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
+223 #define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
+224 
+225 void check_operands(const line& inst, const word& op) {
+226   if (!is_hex_byte(op)) return;
+227   uint8_t expected_bitvector = get(Permitted_operands, op.data);
+228   if (HAS(expected_bitvector, MODRM)) {
+229     check_operands_modrm(inst, op);
+230     compare_bitvector_modrm(inst, expected_bitvector, op);
+231   }
+232   else {
+233     compare_bitvector(inst, expected_bitvector, op);
+234   }
+235 }
+236 
+237 //: Many instructions can be checked just by comparing bitvectors.
+238 
+239 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
+240   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+241   uint8_t bitvector = compute_operand_bitvector(inst);
+242   if (trace_contains_errors()) return;  // duplicate operand type
+243   if (bitvector == expected) return;  // all good with this instruction
+244   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
+245 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
+246     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
+247     const string& optype = Operand_type_name.at(i);
+248     if ((bitvector & 0x1) > (expected & 0x1))
+249       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
+250     else
+251       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
+252     // continue giving all errors for a single instruction
+253   }
+254   // ignore settings in any unused bits
+255 }
+256 
+257 string maybe_name(const word& op) {
+258   if (!is_hex_byte(op)) return "";
+259   if (!contains_key(name, op.data)) return "";
+260   return " ("+get(name, op.data)+')';
+261 }
+262 
+263 uint32_t compute_operand_bitvector(const line& inst) {
+264   uint32_t bitvector = 0;
+265   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
+266     bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
+267     if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate operand type
+268   }
+269   return bitvector;
+270 }
+271 
+272 bool has_operands(const line& inst) {
+273   return SIZE(inst.words) > first_operand(inst);
+274 }
+275 
+276 int first_operand(const line& inst) {
+277   if (inst.words.at(0).data == "0f") return 2;
+278   if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
+279     if (inst.words.at(1).data == "0f")
+280       return 3;
+281     else
+282       return 2;
+283   }
+284   return 1;
+285 }
+286 
+287 // Scan the metadata of 'w' and return the bit corresponding to any operand type.
+288 // Also raise an error if metadata contains multiple operand types.
+289 uint32_t bitvector_for_operand(const word& w) {
+290   uint32_t bv = 0;
+291   bool found = false;
+292   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+293     const string& curr = w.metadata.at(i);
+294     if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
+295     if (found) {
+296       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
+297       return INVALID_OPERANDS;
+298     }
+299     bv = (1 << get(Operand_type, curr));
+300     found = true;
+301   }
+302   return bv;
+303 }
+304 
+305 :(scenario conflicting_operand_type)
+306 % Hide_errors = true;
+307 == 0x1
+308 cd/software-interrupt 80/imm8/imm32
+309 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
+310 
+311 //: Instructions computing effective addresses have more complex rules, so
+312 //: we'll hard-code a common set of instruction-decoding rules.
+313 
+314 :(scenario check_missing_mod_operand)
+315 % Hide_errors = true;
+316 == 0x1
+317 81 0/add/subop       3/rm32/ebx 1/imm32
+318 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
+319 
+320 :(code)
+321 void check_operands_modrm(const line& inst, const word& op) {
+322   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+323   check_metadata_present(inst, "mod", op);
+324   check_metadata_present(inst, "rm32", op);
+325   // no check for r32; some instructions don't use it; just assume it's 0 if missing
+326   if (op.data == "81" || op.data == "8f" || op.data == "ff") {  // keep sync'd with 'help subop'
+327     check_metadata_present(inst, "subop", op);
+328     check_metadata_absent(inst, "r32", op, "should be replaced by subop");
+329   }
+330   if (trace_contains_errors()) return;
+331   if (metadata(inst, "rm32").data != "4") return;
+332   // SIB byte checks
+333   uint8_t mod = hex_byte(metadata(inst, "mod").data);
+334   if (mod != /*direct*/3) {
+335     check_metadata_present(inst, "base", op);
+336     check_metadata_present(inst, "index", op);  // otherwise why go to SIB?
+337   }
+338   else {
+339     check_metadata_absent(inst, "base", op, "direct mode");
+340     check_metadata_absent(inst, "index", op, "direct mode");
+341   }
+342   // no check for scale; 0 (2**0 = 1) by default
+343 }
+344 
+345 // same as compare_bitvector, with a couple of exceptions for modrm-based instructions
+346 //   exception 1: ignore modrm bit since we already checked it above
+347 //   exception 2: modrm instructions can use a displacement on occasion
+348 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
+349   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+350   uint8_t bitvector = compute_operand_bitvector(inst);
+351   if (trace_contains_errors()) return;  // duplicate operand type
+352   expected = CLEAR(expected, MODRM);  // exception 1
+353   if (bitvector == expected) return;  // all good with this instruction
+354   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
+355 //?     cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
+356     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
+357     if (i == DISP8 || i == DISP32) continue;  // exception 2
+358     const string& optype = Operand_type_name.at(i);
+359     if ((bitvector & 0x1) > (expected & 0x1))
+360       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
+361     else
+362       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
+363     // continue giving all errors for a single instruction
+364   }
+365   // ignore settings in any unused bits
+366 }
+367 
+368 void check_metadata_present(const line& inst, const string& type, const word& op) {
+369   if (!has_metadata(inst, type))
+370     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end();
+371 }
+372 
+373 void check_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
+374   if (has_metadata(inst, type))
+375     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
+376 }
+377 
+378 :(scenarios transform)
+379 :(scenario modrm_with_displacement)
+380 % Reg[EAX].u = 0x1;
+381 == 0x1
+382 # just avoid null pointer
+383 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8  # copy *(EAX+4) to EDX
+384 $error: 0
+385 :(scenarios run)
+386 
+387 :(scenario conflicting_operands_in_modrm_instruction)
+388 % Hide_errors = true;
+389 == 0x1
+390 01/add 0/mod 3/mod
+391 +error: '01/add 0/mod 3/mod' has conflicting mod operands
+392 
+393 :(scenario conflicting_operand_type_modrm)
+394 % Hide_errors = true;
+395 == 0x1
+396 01/add 0/mod 3/rm32/r32
+397 +error: '3/rm32/r32' has conflicting operand types; it should have only one
+398 
+399 :(scenario check_missing_rm32_operand)
+400 % Hide_errors = true;
+401 == 0x1
+402 81 0/add/subop 0/mod            1/imm32
+403 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
+404 
+405 :(scenario check_missing_subop_operand)
+406 % Hide_errors = true;
+407 == 0x1
+408 81             0/mod 3/rm32/ebx 1/imm32
+409 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
+410 
+411 :(scenario check_missing_base_operand)
+412 % Hide_errors = true;
+413 == 0x1
+414 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
+415 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
+416 
+417 :(scenario check_missing_index_operand)
+418 % Hide_errors = true;
+419 == 0x1
+420 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
+421 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
+422 
+423 :(scenario check_missing_base_operand_2)
+424 % Hide_errors = true;
+425 == 0x1
+426 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
+427 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
+428 
+429 :(scenario check_base_operand_not_needed_in_direct_mode)
+430 == 0x1
+431 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
+432 $error: 0
+433 
+434 //:: similarly handle multi-byte opcodes
+435 
+436 :(code)
+437 void check_operands_0f(const line& inst) {
+438   assert(inst.words.at(0).data == "0f");
+439   if (SIZE(inst.words) == 1) {
+440     raise << "opcode '0f' requires a second opcode\n" << end();
+441     return;
+442   }
+443   word op = preprocess_op(inst.words.at(1));
+444   if (!contains_key(name_0f, op.data)) {
+445     raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
+446     return;
+447   }
+448   check_operands_0f(inst, op);
+449 }
+450 
+451 void check_operands_f3(const line& /*unused*/) {
+452   raise << "no supported opcodes starting with f3\n" << end();
+453 }
+454 
+455 :(scenario check_missing_disp16_operand)
+456 % Hide_errors = true;
+457 == 0x1
+458 # instruction                     effective address                                                   operand     displacement    immediate
+459 # op          subop               mod             rm32          base        index         scale       r32
+460 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+461   0f 84                                                                                                                                             # jmp if ZF to ??
+462 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand
+463 
+464 :(before "End Globals")
+465 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f;
+466 :(before "End Init Permitted Operands")
+467 //// Class C: just op and disp16
+468 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+469 //  0     0     0      |1       0     0     0
+470 put(Permitted_operands_0f, "84", 0x08);
+471 put(Permitted_operands_0f, "85", 0x08);
+472 put(Permitted_operands_0f, "8c", 0x08);
+473 put(Permitted_operands_0f, "8d", 0x08);
+474 put(Permitted_operands_0f, "8e", 0x08);
+475 put(Permitted_operands_0f, "8f", 0x08);
+476 
+477 //// Class M: using ModR/M byte
+478 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+479 //  0     0     0      |0       0     0     1
+480 put(Permitted_operands_0f, "af", 0x01);
+481 
+482 :(code)
+483 void check_operands_0f(const line& inst, const word& op) {
+484   uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
+485   if (HAS(expected_bitvector, MODRM))
+486     check_operands_modrm(inst, op);
+487   compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
+488 }
+489 
+490 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
+491   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+492   uint8_t bitvector = compute_operand_bitvector(inst);
+493   if (trace_contains_errors()) return;  // duplicate operand type
+494   if (bitvector == expected) return;  // all good with this instruction
+495   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
+496 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
+497     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
+498     const string& optype = Operand_type_name.at(i);
+499     if ((bitvector & 0x1) > (expected & 0x1))
+500       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end();
+501     else
+502       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end();
+503     // continue giving all errors for a single instruction
+504   }
+505   // ignore settings in any unused bits
+506 }
+507 
+508 string tolower(const char* s) {
+509   ostringstream out;
+510   for (/*nada*/;  *s;  ++s)
+511     out << static_cast<char>(tolower(*s));
+512   return out.str();
+513 }
+514 
+515 #undef HAS
+516 #undef SET
+517 #undef CLEAR
+518 
+519 :(before "End Includes")
+520 #include<cctype>
+

+ + + diff --git a/html/subx/032check_operand_bounds.cc.html b/html/subx/032check_operand_bounds.cc.html new file mode 100644 index 00000000..cc7ea319 --- /dev/null +++ b/html/subx/032check_operand_bounds.cc.html @@ -0,0 +1,121 @@ + + + + +Mu - subx/032check_operand_bounds.cc + + + + + + + + + + +

+ 1 //:: Check that the different operands of an instruction aren't too large for their bitfields.
+ 2 
+ 3 :(scenario check_bitfield_sizes)
+ 4 % Hide_errors = true;
+ 5 == 0x1
+ 6 01/add 4/mod 3/rm32 1/r32  # add ECX to EBX
+ 7 +error: '4/mod' too large to fit in bitfield mod
+ 8 
+ 9 :(before "End Globals")
+10 map<string, uint32_t> Operand_bound;
+11 :(before "End One-time Setup")
+12 put(Operand_bound, "subop", 1<<3);
+13 put(Operand_bound, "mod", 1<<2);
+14 put(Operand_bound, "rm32", 1<<3);
+15 put(Operand_bound, "base", 1<<3);
+16 put(Operand_bound, "index", 1<<3);
+17 put(Operand_bound, "scale", 1<<2);
+18 put(Operand_bound, "r32", 1<<3);
+19 put(Operand_bound, "disp8", 1<<8);
+20 put(Operand_bound, "disp16", 1<<16);
+21 // no bound needed for disp32
+22 put(Operand_bound, "imm8", 1<<8);
+23 // no bound needed for imm32
+24 
+25 :(before "Pack Operands(segment code)")
+26 check_operand_bounds(code);
+27 if (trace_contains_errors()) return;
+28 :(code)
+29 void check_operand_bounds(const segment& code) {
+30   trace(99, "transform") << "-- check operand bounds" << end();
+31   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+32     const line& inst = code.lines.at(i);
+33     for (int j = first_operand(inst);  j < SIZE(inst.words);  ++j)
+34       check_operand_bounds(inst.words.at(j));
+35     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
+36   }
+37 }
+38 
+39 void check_operand_bounds(const word& w) {
+40   for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p) {
+41     if (!has_metadata(w, p->first)) continue;
+42     if (!is_hex_int(w.data)) continue;  // later transforms are on their own to do their own bounds checking
+43     int32_t x = parse_int(w.data);
+44     if (x >= 0) {
+45       if (static_cast<uint32_t>(x) >= p->second)
+46         raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+47     }
+48     else {
+49       // hacky? assuming bound is a power of 2
+50       if (x < -1*static_cast<int32_t>(p->second/2))
+51         raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+52     }
+53   }
+54 }
+

+ + + diff --git a/html/subx/032pack_operands.cc.html b/html/subx/032pack_operands.cc.html deleted file mode 100644 index fd74e1e4..00000000 --- a/html/subx/032pack_operands.cc.html +++ /dev/null @@ -1,268 +0,0 @@ - - - - -Mu - subx/032pack_operands.cc - - - - - - - - - - -

-  1 //: Operands can refer to bitfields smaller than a byte. This layer packs
-  2 //: operands into their containing bytes in the right order.
-  3 
-  4 :(scenario pack_immediate_constants)
-  5 == 0x1
-  6 # instruction                     effective address                                                   operand     displacement    immediate
-  7 # op          subop               mod             rm32          base        index         scale       r32
-  8 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-  9   bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
- 10 +transform: packing instruction 'bb 0x2a/imm32'
- 11 +transform: instruction after packing: 'bb 2a 00 00 00'
- 12 +run: copy imm32 0x0000002a to EBX
- 13 
- 14 :(scenario pack_disp8)
- 15 == 0x1
- 16 74 2/disp8  # jump 2 bytes away if ZF is set
- 17 +transform: packing instruction '74 2/disp8'
- 18 +transform: instruction after packing: '74 02'
- 19 
- 20 :(scenarios transform)
- 21 :(scenario pack_disp8_negative)
- 22 == 0x1
- 23 # running this will cause an infinite loop
- 24 74 -1/disp8  # jump 1 byte before if ZF is set
- 25 +transform: packing instruction '74 -1/disp8'
- 26 +transform: instruction after packing: '74 ff'
- 27 :(scenarios run)
- 28 
- 29 :(scenario pack_modrm_imm32)
- 30 == 0x1
- 31 # instruction                     effective address                                                   operand     displacement    immediate
- 32 # op          subop               mod             rm32          base        index         scale       r32
- 33 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
- 34   81          0/add/subop         3/mod/direct    3/ebx/rm32                                                                      1/imm32           # add 1 to EBX
- 35 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
- 36 +transform: instruction after packing: '81 c3 01 00 00 00'
- 37 
- 38 :(scenario pack_imm32_large)
- 39 == 0x1
- 40 b9 0x080490a7/imm32  # copy to ECX
- 41 +transform: packing instruction 'b9 0x080490a7/imm32'
- 42 +transform: instruction after packing: 'b9 a7 90 04 08'
- 43 
- 44 :(before "End Transforms")
- 45 Transform.push_back(pack_operands);
- 46 
- 47 :(code)
- 48 void pack_operands(program& p) {
- 49   trace(99, "transform") << "-- pack operands" << end();
- 50   if (p.segments.empty()) return;
- 51   segment& code = p.segments.at(0);
- 52   for (int i = 0;  i < SIZE(code.lines);  ++i) {
- 53     line& inst = code.lines.at(i);
- 54     if (all_hex_bytes(inst)) continue;
- 55     trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
- 56     pack_operands(inst);
- 57     trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
- 58   }
- 59 }
- 60 
- 61 void pack_operands(line& inst) {
- 62   line new_inst;
- 63   add_opcodes(inst, new_inst);
- 64   add_modrm_byte(inst, new_inst);
- 65   add_sib_byte(inst, new_inst);
- 66   add_disp_bytes(inst, new_inst);
- 67   add_imm_bytes(inst, new_inst);
- 68   inst.words.swap(new_inst.words);
- 69 }
- 70 
- 71 void add_opcodes(const line& in, line& out) {
- 72   out.words.push_back(in.words.at(0));
- 73   if (in.words.at(0).data == "0f" || in.words.at(0).data == "f3")
- 74     out.words.push_back(in.words.at(1));
- 75   if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
- 76     out.words.push_back(in.words.at(2));
- 77 }
- 78 
- 79 void add_modrm_byte(const line& in, line& out) {
- 80   uint8_t mod=0, reg_subop=0, rm32=0;
- 81   bool emit = false;
- 82   for (int i = 0;  i < SIZE(in.words);  ++i) {
- 83     const word& curr = in.words.at(i);
- 84     if (has_metadata(curr, "mod")) {
- 85       mod = hex_byte(curr.data);
- 86       emit = true;
- 87     }
- 88     else if (has_metadata(curr, "rm32")) {
- 89       rm32 = hex_byte(curr.data);
- 90       emit = true;
- 91     }
- 92     else if (has_metadata(curr, "r32")) {
- 93       reg_subop = hex_byte(curr.data);
- 94       emit = true;
- 95     }
- 96     else if (has_metadata(curr, "subop")) {
- 97       reg_subop = hex_byte(curr.data);
- 98       emit = true;
- 99     }
-100   }
-101   if (emit)
-102     out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
-103 }
-104 
-105 void add_sib_byte(const line& in, line& out) {
-106   uint8_t scale=0, index=0, base=0;
-107   bool emit = false;
-108   for (int i = 0;  i < SIZE(in.words);  ++i) {
-109     const word& curr = in.words.at(i);
-110     if (has_metadata(curr, "scale")) {
-111       scale = hex_byte(curr.data);
-112       emit = true;
-113     }
-114     else if (has_metadata(curr, "index")) {
-115       index = hex_byte(curr.data);
-116       emit = true;
-117     }
-118     else if (has_metadata(curr, "base")) {
-119       base = hex_byte(curr.data);
-120       emit = true;
-121     }
-122   }
-123   if (emit)
-124     out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
-125 }
-126 
-127 void add_disp_bytes(const line& in, line& out) {
-128   for (int i = 0;  i < SIZE(in.words);  ++i) {
-129     const word& curr = in.words.at(i);
-130     if (has_metadata(curr, "disp8"))
-131       emit_hex_bytes(out, curr, 1);
-132     if (has_metadata(curr, "disp16"))
-133       emit_hex_bytes(out, curr, 2);
-134     else if (has_metadata(curr, "disp32"))
-135       emit_hex_bytes(out, curr, 4);
-136   }
-137 }
-138 
-139 void add_imm_bytes(const line& in, line& out) {
-140   for (int i = 0;  i < SIZE(in.words);  ++i) {
-141     const word& curr = in.words.at(i);
-142     if (has_metadata(curr, "imm8"))
-143       emit_hex_bytes(out, curr, 1);
-144     else if (has_metadata(curr, "imm32"))
-145       emit_hex_bytes(out, curr, 4);
-146   }
-147 }
-148 
-149 void emit_hex_bytes(line& out, const word& w, int num) {
-150   assert(num <= 4);
-151   if (!is_hex_int(w.data)) {
-152     out.words.push_back(w);
-153     return;
-154   }
-155   emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
-156 }
-157 
-158 void emit_hex_bytes(line& out, uint32_t val, int num) {
-159   assert(num <= 4);
-160   for (int i = 0;  i < num;  ++i) {
-161     out.words.push_back(hex_byte_text(val & 0xff));
-162     val = val >> 8;
-163   }
-164 }
-165 
-166 word hex_byte_text(uint8_t val) {
-167   ostringstream out;
-168   out << HEXBYTE << NUM(val);
-169   word result;
-170   result.data = out.str();
-171   result.original = out.str()+"/auto";
-172   return result;
-173 }
-174 
-175 string to_string(const vector<word>& in) {
-176   ostringstream out;
-177   for (int i = 0;  i < SIZE(in);  ++i) {
-178     if (i > 0) out << ' ';
-179     out << in.at(i).data;
-180   }
-181   return out.str();
-182 }
-183 
-184 :(scenario pack_immediate_constants_hex)
-185 == 0x1
-186 # instruction                     effective address                                                   operand     displacement    immediate
-187 # op          subop               mod             rm32          base        index         scale       r32
-188 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-189   bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
-190 +transform: packing instruction 'bb 0x2a/imm32'
-191 +transform: instruction after packing: 'bb 2a 00 00 00'
-192 +run: copy imm32 0x0000002a to EBX
-193 
-194 :(scenarios transform)
-195 :(scenario pack_silently_ignores_non_hex)
-196 == 0x1
-197 # instruction                     effective address                                                   operand     displacement    immediate
-198 # op          subop               mod             rm32          base        index         scale       r32
-199 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-200   bb                                                                                                                              foo/imm32         # copy foo to EBX
-201 +transform: packing instruction 'bb foo/imm32'
-202 # no change (we're just not printing metadata to the trace)
-203 +transform: instruction after packing: 'bb foo'
-204 $error: 0
-205 :(scenarios run)
-

- - - diff --git a/html/subx/033non_code_segment.cc.html b/html/subx/033non_code_segment.cc.html index cd4efdd5..f5935cd6 100644 --- a/html/subx/033non_code_segment.cc.html +++ b/html/subx/033non_code_segment.cc.html @@ -69,25 +69,26 @@ if ('onhashchange' in window) { 8 cd 12/imm8 9 +error: 12/imm8: metadata imm8 is only allowed in the (first) code segment 10 -11 :(before "End Transforms") -12 Transform.push_back(ensure_operands_only_in_code_segments); -13 :(code) -14 void ensure_operands_only_in_code_segments(/*const*/ program& p) { -15 trace(99, "transform") << "-- ensure operands only in code segments" << end(); -16 if (p.segments.empty()) return; -17 for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) { -18 const segment& seg = p.segments.at(i); -19 for (int j = 0; j < SIZE(seg.lines); ++j) { -20 const line& l = seg.lines.at(j); -21 for (int k = 0; k < SIZE(l.words); ++k) { -22 const word& w = l.words.at(k); -23 for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) -24 if (has_metadata(w, p->first)) -25 raise << w.original << ": metadata " << p->first << " is only allowed in the (first) code segment\n" << end(); -26 } -27 } -28 } -29 } +11 :(before "Pack Operands(segment code)") +12 ensure_operands_only_in_code_segments(p); +13 if (trace_contains_errors()) return; +14 :(code) +15 void ensure_operands_only_in_code_segments(const program& p) { +16 trace(99, "transform") << "-- ensure operands only in code segments" << end(); +17 if (p.segments.empty()) return; +18 for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) { +19 const segment& seg = p.segments.at(i); +20 for (int j = 0; j < SIZE(seg.lines); ++j) { +21 const line& l = seg.lines.at(j); +22 for (int k = 0; k < SIZE(l.words); ++k) { +23 const word& w = l.words.at(k); +24 for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) +25 if (has_metadata(w, p->first)) +26 raise << w.original << ": metadata " << p->first << " is only allowed in the (first) code segment\n" << end(); +27 } +28 } +29 } +30 } diff --git a/html/subx/034discourage_raw_hex.cc.html b/html/subx/034discourage_raw_hex.cc.html new file mode 100644 index 00000000..a6a414af --- /dev/null +++ b/html/subx/034discourage_raw_hex.cc.html @@ -0,0 +1,102 @@ + + + + +Mu - subx/034discourage_raw_hex.cc + + + + + + + + + + +

+ 1 //: Now that we have operand metadata, start warning on instructions that
+ 2 //: don't use it.
+ 3 //:
+ 4 //: While SubX will let you write raw machine code, don't do that unless you
+ 5 //: have a very good reason.
+ 6 
+ 7 :(before "Pack Operands(segment code)")
+ 8 warn_on_raw_hex(code);
+ 9 if (trace_contains_errors()) return;
+10 :(code)
+11 void warn_on_raw_hex(const segment& code) {
+12   trace(99, "transform") << "-- warn on raw hex instructions" << end();
+13   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+14     const line& inst = code.lines.at(i);
+15     if (all_hex_bytes(inst) && has_operands(inst)) {
+16       warn << "'" << to_string(inst) << "': using raw hex is not recommended\n" << end();
+17       break;
+18     }
+19   }
+20 }
+21 
+22 :(scenarios transform)
+23 :(scenario warn_on_hex_bytes_without_operands)
+24 == 0x1
+25 bb 2a 00 00 00  # copy 0x2a (42) to EBX
+26 +warn: 'bb 2a 00 00 00': using raw hex is not recommended
+27 
+28 :(scenario warn_on_non_operand_metadata)
+29 == 0x1
+30 bb 2a 00/foo 00/bar 00  # copy 0x2a (42) to EBX
+31 +warn: 'bb 2a 00/foo 00/bar 00': using raw hex is not recommended
+32 
+33 :(scenario no_warn_on_instructions_without_operands)
+34 == 0x1
+35 55  # push EBP
+36 -warn: '55': using raw hex is not recommended
+

+ + + diff --git a/html/subx/034labels.cc.html b/html/subx/034labels.cc.html deleted file mode 100644 index d3b11f0d..00000000 --- a/html/subx/034labels.cc.html +++ /dev/null @@ -1,226 +0,0 @@ - - - - -Mu - subx/034labels.cc - - - - - - - - - - -

-  1 //: Labels are defined by ending names with a ':'. This layer will compute
-  2 //: addresses for labels, and compute the offset for instructions using them.
-  3 
-  4 :(scenarios transform)
-  5 :(scenario map_label)
-  6 == 0x1
-  7           # instruction                     effective address                                                   operand     displacement    immediate
-  8           # op          subop               mod             rm32          base        index         scale       r32
-  9           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
- 10 loop:
- 11             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
- 12 +transform: label 'loop' is at address 1
- 13 
- 14 :(before "End Transforms")
- 15 Transform.push_back(rewrite_labels);
- 16 
- 17 :(code)
- 18 void rewrite_labels(program& p) {
- 19   trace(99, "transform") << "-- rewrite labels" << end();
- 20   if (p.segments.empty()) return;
- 21   segment& code = p.segments.at(0);
- 22   map<string, int32_t> address;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
- 23   compute_addresses_for_labels(code, address);
- 24   if (trace_contains_errors()) return;
- 25   drop_labels(code);
- 26   if (trace_contains_errors()) return;
- 27   replace_labels_with_addresses(code, address);
- 28 }
- 29 
- 30 void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
- 31   int current_byte = 0;
- 32   for (int i = 0;  i < SIZE(code.lines);  ++i) {
- 33     const line& inst = code.lines.at(i);
- 34     for (int j = 0;  j < SIZE(inst.words);  ++j) {
- 35       const word& curr = inst.words.at(j);
- 36       // hack: if we have any operand metadata left after previous transforms,
- 37       // deduce its size
- 38       // Maybe we should just move this transform to before instruction
- 39       // packing, and deduce the size of *all* operands. But then we'll also
- 40       // have to deal with bitfields.
- 41       if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
- 42         if (*curr.data.rbegin() == ':')
- 43           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
- 44         current_byte += 4;
- 45       }
- 46       // automatically handle /disp8 and /imm8 here
- 47       else if (*curr.data.rbegin() != ':') {
- 48         ++current_byte;
- 49       }
- 50       else {
- 51         if (contains_any_operand_metadata(curr))
- 52           raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
- 53         if (j > 0)
- 54           raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
- 55         string label = curr.data.substr(0, SIZE(curr.data)-1);
- 56         put(address, label, current_byte);
- 57         trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
- 58         // no modifying current_byte; label definitions won't be in the final binary
- 59       }
- 60     }
- 61   }
- 62 }
- 63 
- 64 void drop_labels(segment& code) {
- 65   for (int i = 0;  i < SIZE(code.lines);  ++i) {
- 66     line& inst = code.lines.at(i);
- 67     vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
- 68     inst.words.erase(new_end, inst.words.end());
- 69   }
- 70 }
- 71 
- 72 bool is_label(const word& w) {
- 73   return *w.data.rbegin() == ':';
- 74 }
- 75 
- 76 void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
- 77   int32_t byte_next_instruction_starts_at = 0;
- 78   for (int i = 0;  i < SIZE(code.lines);  ++i) {
- 79     line& inst = code.lines.at(i);
- 80     byte_next_instruction_starts_at += num_bytes(inst);
- 81     line new_inst;
- 82     for (int j = 0;  j < SIZE(inst.words);  ++j) {
- 83       const word& curr = inst.words.at(j);
- 84       if (contains_key(address, curr.data)) {
- 85         int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
- 86         if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
- 87           if (offset > 0xff || offset < -0x7f)
- 88             raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
- 89           else
- 90             emit_hex_bytes(new_inst, offset, 1);
- 91         }
- 92         else if (has_metadata(curr, "disp16")) {
- 93           if (offset > 0xffff || offset < -0x7fff)
- 94             raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
- 95           else
- 96             emit_hex_bytes(new_inst, offset, 2);
- 97         }
- 98         else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
- 99           emit_hex_bytes(new_inst, offset, 4);
-100         }
-101       }
-102       else {
-103         new_inst.words.push_back(curr);
-104       }
-105     }
-106     inst.words.swap(new_inst.words);
-107     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
-108   }
-109 }
-110 
-111 // Assumes all bitfields are packed.
-112 uint32_t num_bytes(const line& inst) {
-113   uint32_t sum = 0;
-114   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-115     const word& curr = inst.words.at(i);
-116     if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))  // only multi-byte operands
-117       sum += 4;
-118     else
-119       sum++;
-120   }
-121   return sum;
-122 }
-123 
-124 string data_to_string(const line& inst) {
-125   ostringstream out;
-126   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-127     if (i > 0) out << ' ';
-128     out << inst.words.at(i).data;
-129   }
-130   return out.str();
-131 }
-132 
-133 //: Label definitions must be the first word on a line. No jumping inside
-134 //: instructions.
-135 //: They should also be the only word on a line.
-136 //: However, you can absolutely have multiple labels map to the same address,
-137 //: as long as they're on separate lines.
-138 
-139 :(scenario multiple_labels_at)
-140 == 0x1
-141           # instruction                     effective address                                                   operand     displacement    immediate
-142           # op          subop               mod             rm32          base        index         scale       r32
-143           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-144 # address 1
-145 loop:
-146 loop2:
-147 # address 1 (labels take up no space)
-148             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
-149 # address 6
-150             eb                                                                                                              loop2/disp8
-151 # address 8
-152             eb                                                                                                              loop3/disp8
-153 # address 10
-154 loop3:
-155 +transform: label 'loop' is at address 1
-156 +transform: label 'loop2' is at address 1
-157 +transform: label 'loop3' is at address 10
-158 # first jump is to -7
-159 +transform: instruction after transform: 'eb f9'
-160 # second jump is to 0 (fall through)
-161 +transform: instruction after transform: 'eb 00'
-

- - - diff --git a/html/subx/035label_types.cc.html b/html/subx/035label_types.cc.html deleted file mode 100644 index 1c980ac8..00000000 --- a/html/subx/035label_types.cc.html +++ /dev/null @@ -1,67 +0,0 @@ - - - - -Mu - subx/035label_types.cc - - - - - - - - - - -

-1 //: Distinguish between labels marking the start of a function, and labels
-2 //: inside functions.
-3 //:
-4 //:   Labels within functions start with a '$', and are only permitted in
-5 //:   'jump' instructions.
-6 //:
-7 //:   Labels marking the start of functions lack the '$' sigil, and are only
-8 //:   permitted in 'call' instructions.
-9 
-

- - - diff --git a/html/subx/035labels.cc.html b/html/subx/035labels.cc.html new file mode 100644 index 00000000..107c41bc --- /dev/null +++ b/html/subx/035labels.cc.html @@ -0,0 +1,304 @@ + + + + +Mu - subx/035labels.cc + + + + + + + + + + +

+  1 //: Labels are defined by ending names with a ':'. This layer will compute
+  2 //: addresses for labels, and compute the offset for instructions using them.
+  3 
+  4 //: We're introducing non-number names for the first time, so it's worth
+  5 //: laying down some ground rules all transforms will follow, so things don't
+  6 //: get too confusing:
+  7 //:   - if it starts with a digit, it's treated as a number. If it can't be
+  8 //:     parsed as hex it will raise an error.
+  9 //:   - if it starts with '-' it's treated as a number.
+ 10 //:   - if it starts with '0x' it's treated as a number.
+ 11 //:   - if it's two characters long, it can't be a name. Either it's a hex
+ 12 //:     byte, or it raises an error.
+ 13 //: That's it. Names can start with any non-digit that isn't a dash. They can
+ 14 //: be a single character long. 'a' is not a hex number, it's a variable.
+ 15 //: Later layers may add more conventions partitioning the space of names. But
+ 16 //: the above rules will remain inviolate.
+ 17 bool is_number(const string& s) {
+ 18   if (s.at(0) == '-') return true;
+ 19   if (isdigit(s.at(0))) return true;
+ 20   return SIZE(s) == 2;
+ 21 }
+ 22 :(before "End Unit Tests")
+ 23 void test_is_number() {
+ 24   CHECK(!is_number("a"));
+ 25 }
+ 26 :(code)
+ 27 void check_valid_name(const string& s) {
+ 28   if (s.empty()) {
+ 29     raise << "empty name!\n" << end();
+ 30     return;
+ 31   }
+ 32   if (s.at(0) == '-')
+ 33     raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
+ 34   if (s.substr(0, 2) == "0x") {
+ 35     raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
+ 36     return;
+ 37   }
+ 38   if (isdigit(s.at(0)))
+ 39     raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
+ 40   if (SIZE(s) == 2)
+ 41     raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
+ 42 }
+ 43 
+ 44 :(scenarios transform)
+ 45 :(scenario map_label)
+ 46 == 0x1
+ 47           # instruction                     effective address                                                   operand     displacement    immediate
+ 48           # op          subop               mod             rm32          base        index         scale       r32
+ 49           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+ 50 loop:
+ 51             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+ 52 +transform: label 'loop' is at address 1
+ 53 
+ 54 :(before "End Level-2 Transforms")
+ 55 Transform.push_back(rewrite_labels);
+ 56 :(code)
+ 57 void rewrite_labels(program& p) {
+ 58   trace(99, "transform") << "-- rewrite labels" << end();
+ 59   if (p.segments.empty()) return;
+ 60   segment& code = p.segments.at(0);
+ 61   // Rewrite Labels(segment code)
+ 62   map<string, int32_t> address;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
+ 63   compute_addresses_for_labels(code, address);
+ 64   if (trace_contains_errors()) return;
+ 65   drop_labels(code);
+ 66   if (trace_contains_errors()) return;
+ 67   replace_labels_with_addresses(code, address);
+ 68 }
+ 69 
+ 70 void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
+ 71   int current_byte = 0;
+ 72   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+ 73     const line& inst = code.lines.at(i);
+ 74     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+ 75       const word& curr = inst.words.at(j);
+ 76       // hack: if we have any operand metadata left after previous transforms,
+ 77       // deduce its size
+ 78       // Maybe we should just move this transform to before instruction
+ 79       // packing, and deduce the size of *all* operands. But then we'll also
+ 80       // have to deal with bitfields.
+ 81       if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
+ 82         if (*curr.data.rbegin() == ':')
+ 83           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
+ 84         current_byte += 4;
+ 85       }
+ 86       // automatically handle /disp8 and /imm8 here
+ 87       else if (*curr.data.rbegin() != ':') {
+ 88         ++current_byte;
+ 89       }
+ 90       else {
+ 91         string label = drop_last(curr.data);
+ 92         // ensure labels look sufficiently different from raw hex
+ 93         check_valid_name(label);
+ 94         if (trace_contains_errors()) return;
+ 95         if (contains_any_operand_metadata(curr))
+ 96           raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
+ 97         if (j > 0)
+ 98           raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
+ 99         put(address, label, current_byte);
+100         trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
+101         // no modifying current_byte; label definitions won't be in the final binary
+102       }
+103     }
+104   }
+105 }
+106 
+107 void drop_labels(segment& code) {
+108   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+109     line& inst = code.lines.at(i);
+110     vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
+111     inst.words.erase(new_end, inst.words.end());
+112   }
+113 }
+114 
+115 bool is_label(const word& w) {
+116   return *w.data.rbegin() == ':';
+117 }
+118 
+119 void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
+120   int32_t byte_next_instruction_starts_at = 0;
+121   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+122     line& inst = code.lines.at(i);
+123     byte_next_instruction_starts_at += num_bytes(inst);
+124     line new_inst;
+125     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+126       const word& curr = inst.words.at(j);
+127       if (contains_key(address, curr.data)) {
+128         int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
+129         if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
+130           if (offset > 0xff || offset < -0x7f)
+131             raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
+132           else
+133             emit_hex_bytes(new_inst, offset, 1);
+134         }
+135         else if (has_metadata(curr, "disp16")) {
+136           if (offset > 0xffff || offset < -0x7fff)
+137             raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
+138           else
+139             emit_hex_bytes(new_inst, offset, 2);
+140         }
+141         else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
+142           emit_hex_bytes(new_inst, offset, 4);
+143         }
+144       }
+145       else {
+146         new_inst.words.push_back(curr);
+147       }
+148     }
+149     inst.words.swap(new_inst.words);
+150     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+151   }
+152 }
+153 
+154 // Assumes all bitfields are packed.
+155 uint32_t num_bytes(const line& inst) {
+156   uint32_t sum = 0;
+157   for (int i = 0;  i < SIZE(inst.words);  ++i) {
+158     const word& curr = inst.words.at(i);
+159     if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))  // only multi-byte operands
+160       sum += 4;
+161     else
+162       sum++;
+163   }
+164   return sum;
+165 }
+166 
+167 string data_to_string(const line& inst) {
+168   ostringstream out;
+169   for (int i = 0;  i < SIZE(inst.words);  ++i) {
+170     if (i > 0) out << ' ';
+171     out << inst.words.at(i).data;
+172   }
+173   return out.str();
+174 }
+175 
+176 string drop_last(const string& s) {
+177   return string(s.begin(), --s.end());
+178 }
+179 
+180 //: Label definitions must be the first word on a line. No jumping inside
+181 //: instructions.
+182 //: They should also be the only word on a line.
+183 //: However, you can absolutely have multiple labels map to the same address,
+184 //: as long as they're on separate lines.
+185 
+186 :(scenario multiple_labels_at)
+187 == 0x1
+188           # instruction                     effective address                                                   operand     displacement    immediate
+189           # op          subop               mod             rm32          base        index         scale       r32
+190           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+191 # address 1
+192 loop:
+193  $loop2:
+194 # address 1 (labels take up no space)
+195             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+196 # address 6
+197             eb                                                                                                              $loop2/disp8
+198 # address 8
+199             eb                                                                                                              $loop3/disp8
+200 # address 0xa
+201  $loop3:
+202 +transform: label 'loop' is at address 1
+203 +transform: label '$loop2' is at address 1
+204 +transform: label '$loop3' is at address a
+205 # first jump is to -7
+206 +transform: instruction after transform: 'eb f9'
+207 # second jump is to 0 (fall through)
+208 +transform: instruction after transform: 'eb 00'
+209 
+210 :(scenario label_too_short)
+211 % Hide_errors = true;
+212 == 0x1
+213           # instruction                     effective address                                                   operand     displacement    immediate
+214           # op          subop               mod             rm32          base        index         scale       r32
+215           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+216 xz:
+217             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+218 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
+219 
+220 :(scenario label_hex)
+221 % Hide_errors = true;
+222 == 0x1
+223           # instruction                     effective address                                                   operand     displacement    immediate
+224           # op          subop               mod             rm32          base        index         scale       r32
+225           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+226 0xab:
+227             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+228 +error: '0xab' looks like a hex number; use a different name
+229 
+230 :(scenario label_negative_hex)
+231 % Hide_errors = true;
+232 == 0x1
+233           # instruction                     effective address                                                   operand     displacement    immediate
+234           # op          subop               mod             rm32          base        index         scale       r32
+235           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+236  -a:  # indent to avoid looking like a trace_should_not_contain command for this scenario
+237             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+238 +error: '-a' starts with '-', which can be confused with a negative number; use a different name
+

+ + + diff --git a/html/subx/036recommend_labels.cc.html b/html/subx/036recommend_labels.cc.html new file mode 100644 index 00000000..1aa057b9 --- /dev/null +++ b/html/subx/036recommend_labels.cc.html @@ -0,0 +1,145 @@ + + + + +Mu - subx/036recommend_labels.cc + + + + + + + + + + +

+ 1 //: Now that we have labels, using non-label offsets should be unnecessary.
+ 2 //: While SubX will allow programmers to write raw machine code, that isn't
+ 3 //: *recommended* once we have more ergonomic alternatives.
+ 4 
+ 5 :(scenario warn_on_jump_offset)
+ 6 == 0x1
+ 7 7e/jump-if 1/disp8
+ 8 +warn: '7e/jump-if 1/disp8': using raw offsets for jumps is not recommended; use labels instead
+ 9 
+10 :(scenarios transform)
+11 :(scenario warn_on_call_offset)
+12 == 0x1
+13 e8/call 1/disp32
+14 +warn: 'e8/call 1/disp32': using raw offsets for calls is not recommended; use labels instead
+15 :(scenarios run)
+16 
+17 :(before "Rewrite Labels(segment code)")
+18 recommend_labels(code);
+19 if (trace_contains_errors()) return;
+20 :(code)
+21 void recommend_labels(const segment& code) {
+22   trace(99, "transform") << "-- check for numeric labels" << end();
+23   for (int i = 0;  i < SIZE(code.lines);  ++i)
+24     recommend_labels(code.lines.at(i));
+25 }
+26 
+27 void recommend_labels(const line& inst) {
+28   int idx = first_operand(inst);
+29   if (idx >= SIZE(inst.words)) return;
+30   if (!is_number(inst.words.at(idx).data)) return;
+31   if (is_jump(inst))
+32     warn << "'" << inst.original << "': using raw offsets for jumps is not recommended; use labels instead\n" << end();
+33   else if (is_call(inst))
+34     warn << "'" << inst.original << "': using raw offsets for calls is not recommended; use labels instead\n" << end();
+35 }
+36 
+37 bool is_jump(const line& inst) {
+38   string op1 = preprocess_op(inst.words.at(0)).data;
+39   if (op1 == "0f") {
+40     string op2 = preprocess_op(inst.words.at(1)).data;
+41     return Jump_opcodes_0f.find(op1) != Jump_opcodes_0f.end();
+42   }
+43   if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/4;
+44   return Jump_opcodes.find(op1) != Jump_opcodes.end();
+45 }
+46 
+47 bool is_call(const line& inst) {
+48   string op1 = preprocess_op(inst.words.at(0)).data;
+49   if (op1 == "e8") return true;
+50   if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/2;
+51   return false;  // no multi-byte call opcodes
+52 }
+53 
+54 int subop(const line& inst) {
+55   int idx = first_operand(inst);
+56   assert(idx < SIZE(inst.words));
+57   return (parse_int(inst.words.at(idx).data)>>3) & 0x7;
+58 }
+59 
+60 :(before "End Globals")
+61 set<string> Jump_opcodes;
+62 set<string> Jump_opcodes_0f;
+63 :(before "End One-time Setup")
+64 init_jump_opcodes();
+65 :(code)
+66 void init_jump_opcodes() {
+67   Jump_opcodes.insert("74");
+68   Jump_opcodes.insert("75");
+69   Jump_opcodes.insert("7c");
+70   Jump_opcodes.insert("7d");
+71   Jump_opcodes.insert("7e");
+72   Jump_opcodes.insert("7f");
+73   Jump_opcodes_0f.insert("84");
+74   Jump_opcodes_0f.insert("85");
+75   Jump_opcodes_0f.insert("8c");
+76   Jump_opcodes_0f.insert("8d");
+77   Jump_opcodes_0f.insert("8e");
+78   Jump_opcodes_0f.insert("8f");
+79   Jump_opcodes.insert("e9");
+80   Jump_opcodes.insert("eb");
+81 }
+

+ + + diff --git a/html/subx/037label_types.cc.html b/html/subx/037label_types.cc.html new file mode 100644 index 00000000..0437fdb4 --- /dev/null +++ b/html/subx/037label_types.cc.html @@ -0,0 +1,110 @@ + + + + +Mu - subx/037label_types.cc + + + + + + + + + + +

+ 1 //: Distinguish between labels marking the start of a function, and labels
+ 2 //: inside functions.
+ 3 //:
+ 4 //: - Labels within functions start with a '$', and are only permitted in
+ 5 //:   'jump' instructions.
+ 6 //:
+ 7 //: - Labels marking the start of functions lack the '$' sigil, and are only
+ 8 //:   permitted in 'call' instructions.
+ 9 
+10 :(before "Rewrite Labels(segment code)")
+11 check_label_types(code);
+12 if (trace_contains_errors()) return;
+13 :(code)
+14 void check_label_types(const segment& code) {
+15   trace(99, "transform") << "-- check label types" << end();
+16   for (int i = 0;  i < SIZE(code.lines);  ++i)
+17     check_label_types(code.lines.at(i));
+18 }
+19 
+20 void check_label_types(const line& inst) {
+21   int idx = first_operand(inst);
+22   if (idx >= SIZE(inst.words)) return;
+23   const word& target = inst.words.at(idx);
+24   if (is_number(target.data)) return;  // handled elsewhere
+25   if (is_jump(inst) && target.data.at(0) != '$')
+26     raise << "'" << inst.original << "': jumps should always be to internal labels starting with '$'\n" << end();
+27   if (is_call(inst) && target.data.at(0) == '$')
+28     raise << "'" << inst.original << "': calls should always be to function labels (not starting with '$')\n" << end();
+29 }
+30 
+31 :(scenario catch_jump_to_function)
+32 % Hide_errors = true;
+33 == 0x1
+34 main:
+35 7e/jump-if foo/disp8
+36 foo:
+37 +error: '7e/jump-if foo/disp8': jumps should always be to internal labels starting with '$'
+38 
+39 :(scenario catch_call_to_internal_label)
+40 % Hide_errors = true;
+41 == 0x1
+42 main:
+43 e8/call $foo/disp32
+44  $foo:  # indent to avoid looking like a trace_count command for this scenario
+45 +error: 'e8/call $foo/disp32': calls should always be to function labels (not starting with '$')
+

+ + + diff --git a/html/subx/038check_local_jumps.cc.html b/html/subx/038check_local_jumps.cc.html new file mode 100644 index 00000000..a37fe3f9 --- /dev/null +++ b/html/subx/038check_local_jumps.cc.html @@ -0,0 +1,125 @@ + + + + +Mu - subx/038check_local_jumps.cc + + + + + + + + + + +

+ 1 //: Make sure that we never jump from one function to within another.
+ 2 //:
+ 3 //: (The check for label types already ensures we can't jump to the start of
+ 4 //: another function.)
+ 5 
+ 6 :(scenario jump_to_different_function)
+ 7 % Hide_errors = true;
+ 8 == 0x1
+ 9 fn1:
+10   7e/jump-if $target/disp8
+11 fn2:
+12  $target:
+13 +error: '7e/jump-if $target/disp8' in function 'fn1': jump to within another function 'fn2' is a *really* bad idea
+14 
+15 :(before "Rewrite Labels(segment code)")
+16 check_local_jumps(code);
+17 if (trace_contains_errors()) return;
+18 :(code)
+19 void check_local_jumps(const segment& code) {
+20   map</*jump target*/string, /*containing call target*/string> function;
+21   compute_function_target(code, function);
+22   if (trace_contains_errors()) return;
+23   string current_function;
+24   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+25     const line& inst = code.lines.at(i);
+26     if (SIZE(inst.words) == 1 && is_label(inst.words.at(0))) {
+27       // label definition
+28       if (inst.words.at(0).data.at(0) != '$')
+29         current_function = drop_last(inst.words.at(0).data);
+30     }
+31     else if (is_jump(inst)) {
+32       const word& target = inst.words.at(first_operand(inst));
+33       if (!contains_key(function, target.data)) continue;  // error/warning handled elsewhere
+34       if (get(function, target.data) == current_function) continue;
+35       raise << "'" << to_string(inst) << "' in function '" << current_function << "': jump to within another function '" << get(function, target.data) << "' is a *really* bad idea\n" << end();
+36       return;
+37     }
+38   }
+39 }
+40 
+41 void compute_function_target(const segment& code, map<string, string>& out) {
+42   string current_function;
+43   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+44     const line& inst = code.lines.at(i);
+45     if (SIZE(inst.words) != 1) continue;
+46     const word& curr = inst.words.at(0);
+47     if (!is_label(curr)) continue;
+48     const string& label = drop_last(curr.data);
+49     if (label.at(0) != '$') {
+50       current_function = label;
+51       continue;
+52     }
+53     if (contains_key(out, label)) {
+54       raise << "duplicate label '" << label << "'\n" << end();
+55       return;
+56     }
+57     // current_function can be empty! if so that would be 'main'.
+58     put(out, label, current_function);
+59   }
+60 }
+

+ + + diff --git a/html/subx/ex3.subx.html b/html/subx/ex3.subx.html index c3dddcfb..56cde00c 100644 --- a/html/subx/ex3.subx.html +++ b/html/subx/ex3.subx.html @@ -71,18 +71,18 @@ if ('onhashchange' in window) { 16 # counter: ECX = 1 17 b9/copy 1/imm32 # copy 1 to ECX 18 -19 loop: +19 $loop: 20 # while (counter <= 10) 21 81 7/subop/compare 3/mod/direct 1/rm32/ecx 0xa/imm32 # compare ECX, 10/imm -22 7f/jump-if exit/disp8 # jump-if-greater exit +22 7f/jump-if $exit/disp8 # jump-if-greater $exit 23 # result += counter 24 01/add 3/mod/direct 3/rm32/ebx 1/r32/ecx # add ECX to EBX 25 # ++counter 26 81 0/subop/add 3/mod/direct 1/rm32/ecx 1/imm32 # add 1 to ECX 27 # loop -28 eb/jump loop/disp8 # jump loop +28 eb/jump $loop/disp8 # jump $loop 29 -30 exit: +30 $exit: 31 # exit(EBX) 32 b8/copy 1/imm32 # copy 1 to EAX 33 cd/syscall 0x80/imm8 # int 80h diff --git a/html/subx/ex4.subx.html b/html/subx/ex4.subx.html index 609fca41..3b8fc4cf 100644 --- a/html/subx/ex4.subx.html +++ b/html/subx/ex4.subx.html @@ -90,7 +90,7 @@ if ('onhashchange' in window) { 35 b8/copy 1/imm32 # copy 1 to EAX 36 cd/syscall 0x80/imm8 # int 80h 37 -38 == 0x080490a7 +38 == 0x080490a7 # data segment 39 00 00 00 00 # space for read() to write to 40 41 # vim:ft=subx:nowrap diff --git a/html/subx/ex6.subx.html b/html/subx/ex6.subx.html index f79324f6..7b4238e9 100644 --- a/html/subx/ex6.subx.html +++ b/html/subx/ex6.subx.html @@ -81,12 +81,12 @@ if ('onhashchange' in window) { 26 b8/copy 1/imm32 # copy 1 to EAX 27 cd/syscall 0x80/imm8 # int 80h 28 -29 == 0x08049093 +29 == 0x08049093 # data segment 30 # size of string 31 0e 00 00 00 32 # string -33 48 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 0a -34 # h e l l o , ␣ w o r l d ! newline +33 48 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 0a 00 +34 # h e l l o , ␣ w o r l d ! newline null 35 36 # vim:ft=subx:nowrap diff --git a/html/subx/ex7.subx.html b/html/subx/ex7.subx.html index 33e56e2a..baa9076f 100644 --- a/html/subx/ex7.subx.html +++ b/html/subx/ex7.subx.html @@ -94,7 +94,7 @@ if ('onhashchange' in window) { 38 b8/copy . . . . . . . 1/imm32 # copy 1 to EAX 39 # if (n <= 1) jump exit 40 81 7/subop/compare 3/mod/direct 2/rm32/EDX . . . . . 1/imm32 # compare EDX with 1 -41 7e/jump-if . . . . . . factorial:exit/disp8 # jump if <= to exit +41 7e/jump-if-<= . . . . . . $factorial:exit/disp8 # jump if <= to $factorial:exit 42 # EBX: n-1 43 89/copy 3/mod/direct 3/rm32/EBX . . . 2/r32/EDX . . # copy EDX to EBX 44 81 5/subop/subtract 3/mod/direct 3/rm32/EBX . . . . . 1/imm32 # subtract 1 from EBX @@ -114,7 +114,7 @@ if ('onhashchange' in window) { 58 # return n * factorial(n-1) 59 0f af/multiply 3/mod/direct 2/rm32/EDX . . . 0/r32/EAX . . # multiply EDX (n) into EAX (factorial(n-1)) 60 # TODO: check for overflow -61 factorial:exit: +61 $factorial:exit: 62 c3/return 63 64 # vim:ft=subx:nowrap:so=0 diff --git a/html/subx/ex8.subx.html b/html/subx/ex8.subx.html new file mode 100644 index 00000000..96bfb430 --- /dev/null +++ b/html/subx/ex8.subx.html @@ -0,0 +1,159 @@ + + + + +Mu - subx/ex8.subx + + + + + + + + + + +

+ 1 ## example showing file syscalls
+ 2 # Create a file, open it for writing, write a character to it, close it, open
+ 3 # it for reading, read a character from it, close it, delete it, and return
+ 4 # the character read.
+ 5 #
+ 6 # To run:
+ 7 #   $ subx translate ex8.subx ex8
+ 8 #   $ subx run ex8
+ 9 # Expected result:
+10 #   $ echo $?
+11 #   97
+12 
+13 == 0x08048074  # code segment, after leaving room for ELF header and segment headers
+14 # instruction                     effective address                                                   operand     displacement    immediate
+15 # op          subop               mod             rm32          base        index         scale       r32
+16 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+17 
+18   ## creat(filename)
+19   bb/copy                         .               .             .           .             .           .           .               0x08049131/imm32/fname  # copy to EBX
+20   b9/copy                         .               .             .           .             .           .           .               0x180/imm32/fixed-perms # copy 0 to ECX
+21   b8/copy                         .               .             .           .             .           .           .               8/imm32/creat           # copy 8 to EAX
+22   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+23 
+24   ## fd = open(filename, O_WRONLY, 0)
+25   bb/copy                         .               .             .           .             .           .           .               0x08049131/imm32/fname  # copy to EBX
+26   b9/copy                         .               .             .           .             .           .           .               1/imm32/wronly          # copy 1 to ECX
+27   ba/copy                         .               .             .           .             .           .           .               0x180/imm32/fixed-perms # copy 0 to EDX
+28   b8/copy                         .               .             .           .             .           .           .               5/imm32/open            # copy 5 to EAX
+29   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+30   # save fd
+31   bb/copy                         .               .             .           .             .           .           .               0x08049125/imm32/fd     # copy to EBX
+32   89/copy                         0/mod/indirect  3/rm32/EBX                                          0/r32/EAX                                           # copy EAX to *EBX
+33 
+34   ## write(fd, "a", 1)
+35   # load fd
+36   bb/copy                         .               .             .           .             .           .           .               0x08049125/imm32/fd     # copy to EBX
+37   8b/copy                         0/mod/indirect  3/rm32/EBX                                          3/r32/EBX                                           # copy *EBX to EBX
+38   #
+39   b9/copy                         .               .             .           .             .           .           .               0x08049129/imm32/a      # copy to ECX
+40   ba/copy                         .               .             .           .             .           .           .               1/imm32/size            # copy 1 to EDX
+41   b8/copy                         .               .             .           .             .           .           .               4/imm32/write           # copy 4 to EAX
+42   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+43 
+44   ## close(fd)
+45   # load fd
+46   bb/copy                         .               .             .           .             .           .           .               0x08049125/imm32/fd     # copy to EBX
+47   8b/copy                         0/mod/indirect  3/rm32/EBX                                          3/r32/EBX                                           # copy *EBX to EBX
+48   #
+49   b8/copy                         .               .             .           .             .           .           .               6/imm32/close           # copy 6 to EAX
+50   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+51 
+52   ## fd = open(filename, O_RDONLY, 0)
+53   bb/copy                         .               .             .           .             .           .           .               0x08049131/imm32/fname  # copy to EBX
+54   b9/copy                         .               .             .           .             .           .           .               0/imm32/rdonly          # copy 0 to ECX
+55   ba/copy                         .               .             .           .             .           .           .               0x180/imm32/fixed-perms # copy 0 to EDX
+56   b8/copy                         .               .             .           .             .           .           .               5/imm32/open            # copy 5 to EAX
+57   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+58   # save fd
+59   bb/copy                         .               .             .           .             .           .           .               0x08049125/imm32/fd     # copy to EBX
+60   89/copy                         0/mod/indirect  3/rm32/EBX                                          0/r32/EAX                                           # copy EAX to *EBX
+61 
+62   ## read(fd, b, 1)
+63   # load fd
+64   bb/copy                         .               .             .           .             .           .           .               0x08049125/imm32/fd     # copy to EBX
+65   8b/copy                         0/mod/indirect  3/rm32/EBX                                          3/r32/EBX                                           # copy *EBX to EBX
+66   #
+67   b9/copy                         .               .             .           .             .           .           .               0x0804912d/imm32/b      # copy to ECX
+68   ba/copy                         .               .             .           .             .           .           .               1/imm32/size            # copy 1 to EDX
+69   b8/copy                         .               .             .           .             .           .           .               3/imm32/read            # copy 3 to EAX
+70   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+71 
+72   ## close(fd)
+73   # load fd
+74   bb/copy                         .               .             .           .             .           .           .               0x08049125/imm32/fd     # copy to EBX
+75   8b/copy                         0/mod/indirect  3/rm32/EBX                                          3/r32/EBX                                           # copy *EBX to EBX
+76   #
+77   b8/copy                         .               .             .           .             .           .           .               6/imm32/close           # copy 8 to EAX
+78   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+79 
+80   ## unlink(filename)
+81   bb/copy                         .               .             .           .             .           .           .               0x08049131/imm32/fname  # copy to EBX
+82   b8/copy                         .               .             .           .             .           .           .               0xa/imm32/unlink        # copy 8 to EAX
+83   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+84 
+85   ## exit(b)
+86   # load b
+87   bb                              .               .             .           .             .           .           .               0x0804912d/imm32/b      # copy to EBX
+88   8b/copy                         0/mod/indirect  3/rm32/EBX                                          3/r32/EBX                                           # copy *EBX to EBX
+89   #
+90   b8/copy                         .               .             .           .             .           .           .               1/imm32/exit            # copy 1 to EAX
+91   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8               # int 80h
+92 
+93 == 0x08049125  # data segment
+94 00 00 00 00  # fd
+95 61 00 00 00  # a: string to write to file: 'a'
+96 00 00 00 00  # b: space for string read from file
+97 2e 66 6f 6f 00 00 00 00  # filename: '.foo'
+98 
+99 # vim:ft=subx:nowrap:tw&
+

+ + + diff --git a/html/subx/x.subx.html b/html/subx/x.subx.html deleted file mode 100644 index 373298c2..00000000 --- a/html/subx/x.subx.html +++ /dev/null @@ -1,124 +0,0 @@ - - - - -Mu - subx/x.subx - - - - - - - - - - -

- 1 ## compute the factorial of 5, and return the result in the exit code
- 2 #
- 3 # To run:
- 4 #   $ subx translate ex7.subx ex7
- 5 #   $ subx run ex7
- 6 # Expected result:
- 7 #   $ echo $?
- 8 #   120
- 9 
-10 == 0x08048054  # code segment, after leaving room for ELF header
-11 # instruction                     effective address                                                   operand     displacement    immediate
-12 # op          subop               mod             rm32          base        index         scale       r32
-13 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-14 
-15 # main:
-16   # prepare to make a call
-17   55/push                         .               .             .           .             .           .           .               .                 # push EBP
-18   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
-19   # factorial(5)
-20   68/push                         .               .             .           .             .           .           .               5/imm32           # push 5
-21   e8/call                         .               .             .           .             .           .           factorial/disp32
-22   # discard arg
-23   5a/pop                          .               .             .           .             .           .           .               .                 # pop into EDX
-24   # clean up after call
-25   89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
-26   5d/pop                          .               .             .           .             .           .           .               .                 # pop to EBP
-27 
-28   # exit(EAX)
-29   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
-30   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
-31   cd/syscall                      .               .             .           .             .           .           .               0x80/imm8         # int 80h
-32 
-33 # factorial(n)
-34 factorial:
-35   # initialize n
-36   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8         .                 # copy *(ESP+4) to EDX
-37   # initialize EAX to 1 (base case)
-38   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
-39   # if (n <= 1) jump exit
-40   81          7/subop/compare     3/mod/direct    2/rm32/EDX    .           .             .           .           .               1/imm32           # compare EDX with 1
-41   7e/jump-if                      .               .             .           .             .           .           factorial:exit/disp8              # jump if <= to exit
-42   # EBX: n-1
-43   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           2/r32/EDX   .               .                 # copy EDX to EBX
-44   81          5/subop/subtract    3/mod/direct    3/rm32/EBX    .           .             .           .           .               1/imm32           # subtract 1 from EBX
-45   # prepare call
-46   55/push                         .               .             .           .             .           .           .               .                 # push EBP
-47   89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
-48   # EAX: factorial(n-1)
-49   53/push                         .               .             .           .             .           .           .               .                 # push EBX
-50   e8/call                         .               .             .           .             .           .           factorial/disp32
-51   # discard arg
-52   5e/pop                          .               .             .           .             .           .           .               .                 # pop into ESI
-53   # clean up after call
-54   89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
-55   5d/pop                          .               .             .           .             .           .           .               .                 # pop to EBP
-56   # refresh n
-57   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8         .                 # copy *(ESP+4) to EDX
-58   # return n * factorial(n-1)
-59   0f af/multiply                  3/mod/direct    2/rm32/EDX    .           .             .           0/r32/EAX   .               .                 # multiply EDX (n) into EAX (factorial(n-1))
-60   # TODO: check for overflow
-61 factorial:exit:
-62   c3/return
-63 
-64 # vim:ft=subx:nowrap:so=0
-

- - - -- cgit 1.4.1-2-gfad0