From 37d53a70958bfe5b1d7946229af9c12f0b865abc Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sun, 23 Sep 2018 22:38:16 -0700 Subject: 4512 --- html/subx/000organization.cc.html | 2 +- html/subx/003trace.cc.html | 66 +- html/subx/011run.cc.html | 413 +++++----- html/subx/013direct_addressing.cc.html | 1080 +++++++++++++------------- html/subx/014indirect_addressing.cc.html | 57 +- html/subx/015immediate_addressing.cc.html | 20 +- html/subx/016index_addressing.cc.html | 6 +- html/subx/019functions.cc.html | 4 +- html/subx/028translate.cc.html | 4 +- html/subx/030---operands.cc.html | 26 +- html/subx/031check_operands.cc.html | 650 ++++++++-------- html/subx/032check_operand_bounds.cc.html | 4 +- html/subx/034compute_segment_address.cc.html | 2 +- html/subx/035labels.cc.html | 278 +++---- html/subx/036global_variables.cc.html | 10 +- html/subx/038---literal_strings.cc.html | 281 +++++++ html/subx/040---tests.cc.html | 190 ++--- html/subx/apps/crenshaw2-1.subx.html | 272 ++++--- html/subx/apps/factorial.subx.html | 372 ++++----- html/subx/examples/ex10.subx.html | 4 +- html/subx/examples/ex11.subx.html | 364 +++++++++ html/subx/examples/ex5.subx.html | 64 +- html/subx/examples/ex8.subx.html | 70 +- html/subx/examples/ex9.subx.html | 6 +- 24 files changed, 2497 insertions(+), 1748 deletions(-) create mode 100644 html/subx/038---literal_strings.cc.html create mode 100644 html/subx/examples/ex11.subx.html (limited to 'html/subx') diff --git a/html/subx/000organization.cc.html b/html/subx/000organization.cc.html index 10d753e9..e3e2abb4 100644 --- a/html/subx/000organization.cc.html +++ b/html/subx/000organization.cc.html @@ -169,7 +169,7 @@ if ('onhashchange' in window) { 110 // 111 // End Globals 112 -113 int main(int argc, char* argv[]) { +113 int main(int argc, char* argv[]) { 114 atexit(reset); 115 // we require a 32-bit little-endian system 116 assert(sizeof(int) == 4); diff --git a/html/subx/003trace.cc.html b/html/subx/003trace.cc.html index 6abc7086..9e3c7c6c 100644 --- a/html/subx/003trace.cc.html +++ b/html/subx/003trace.cc.html @@ -129,10 +129,10 @@ if ('onhashchange' in window) { 70 :(before "End Types") 71 struct trace_line { 72 int depth; // optional field just to help browse traces later - 73 string label; + 73 string label; 74 string contents; - 75 trace_line(string l, string c) :depth(0), label(l), contents(c) {} - 76 trace_line(int d, string l, string c) :depth(d), label(l), contents(c) {} + 75 trace_line(string l, string c) :depth(0), label(l), contents(c) {} + 76 trace_line(int d, string l, string c) :depth(d), label(l), contents(c) {} 77 }; 78 79 //: Support for tracing an entire run. @@ -176,14 +176,14 @@ if ('onhashchange' in window) { 117 trace_stream() :curr_stream(NULL), curr_depth(Max_depth), collect_depth(Max_depth) {} 118 ~trace_stream() { if (curr_stream) delete curr_stream; } 119 -120 ostream& stream(string label) { -121 return stream(Max_depth, label); +120 ostream& stream(string label) { +121 return stream(Max_depth, label); 122 } 123 -124 ostream& stream(int depth, string label) { +124 ostream& stream(int depth, string label) { 125 if (depth > collect_depth) return null_stream; 126 curr_stream = new ostringstream; -127 curr_label = label; +127 curr_label = label; 128 curr_depth = depth; 129 (*curr_stream) << std::hex; 130 return *curr_stream; @@ -199,7 +199,7 @@ if ('onhashchange' in window) { 140 // be sure to call this before messing with curr_stream or curr_label 141 void newline(); 142 // useful for debugging -143 string readable_contents(string label); // empty label = show everything +143 string readable_contents(string label); // empty label = show everything 144 }; 145 146 :(code) @@ -220,12 +220,12 @@ if ('onhashchange' in window) { 161 curr_depth = Max_depth; 162 } 163 -164 string trace_stream::readable_contents(string label) { +164 string trace_stream::readable_contents(string label) { 165 ostringstream output; -166 label = trim(label); +166 label = trim(label); 167 for (vector<trace_line>::iterator p = past_lines.begin(); p != past_lines.end(); ++p) -168 if (label.empty() || label == p->label) { -169 output << std::setw(4) << p->depth << ' ' << p->label << ": " << p->contents << '\n'; +168 if (label.empty() || label == p->label) { +169 output << std::setw(4) << p->depth << ' ' << p->label << ": " << p->contents << '\n'; 170 } 171 return output.str(); 172 } @@ -318,10 +318,10 @@ if ('onhashchange' in window) { 259 } 260 261 #define CHECK_TRACE_COUNT(label, count) \ -262 if (Passed && trace_count(label) != (count)) { \ -263 cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \ -264 cerr << " got " << trace_count(label) << '\n'; /* multiple eval */ \ -265 DUMP(label); \ +262 if (Passed && trace_count(label) != (count)) { \ +263 cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \ +264 cerr << " got " << trace_count(label) << '\n'; /* multiple eval */ \ +265 DUMP(label); \ 266 Passed = false; \ 267 return; /* Currently we stop at the very first failure. */ \ 268 } @@ -337,10 +337,10 @@ if ('onhashchange' in window) { 278 while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) 279 ++curr_expected_line; 280 if (curr_expected_line == SIZE(expected_lines)) return true; -281 string label, contents; +281 string label, contents; 282 split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); 283 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -284 if (label != p->label) continue; +284 if (label != p->label) continue; 285 if (contents != trim(p->contents)) continue; 286 ++curr_expected_line; 287 while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) @@ -349,19 +349,19 @@ if ('onhashchange' in window) { 290 split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); 291 } 292 -293 if (line_exists_anywhere(label, contents)) { -294 cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n"; +293 if (line_exists_anywhere(label, contents)) { +294 cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n"; 295 DUMP(""); 296 } 297 else { 298 cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n"; -299 DUMP(label); +299 DUMP(label); 300 } 301 Passed = false; 302 return false; 303 } 304 -305 void split_label_contents(const string& s, string* label, string* contents) { +305 void split_label_contents(const string& s, string* label, string* contents) { 306 static const string delim(": "); 307 size_t pos = s.find(delim); 308 if (pos == string::npos) { @@ -374,23 +374,23 @@ if ('onhashchange' in window) { 315 } 316 } 317 -318 bool line_exists_anywhere(const string& label, const string& contents) { +318 bool line_exists_anywhere(const string& label, const string& contents) { 319 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -320 if (label != p->label) continue; +320 if (label != p->label) continue; 321 if (contents == trim(p->contents)) return true; 322 } 323 return false; 324 } 325 -326 int trace_count(string label) { -327 return trace_count(label, ""); +326 int trace_count(string label) { +327 return trace_count(label, ""); 328 } 329 -330 int trace_count(string label, string line) { +330 int trace_count(string label, string line) { 331 if (!Trace_stream) return 0; 332 long result = 0; 333 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -334 if (label == p->label) { +334 if (label == p->label) { 335 if (line == "" || trim(line) == trim(p->contents)) 336 ++result; 337 } @@ -398,11 +398,11 @@ if ('onhashchange' in window) { 339 return result; 340 } 341 -342 int trace_count_prefix(string label, string prefix) { +342 int trace_count_prefix(string label, string prefix) { 343 if (!Trace_stream) return 0; 344 long result = 0; 345 for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { -346 if (label == p->label) { +346 if (label == p->label) { 347 if (starts_with(trim(p->contents), trim(prefix))) 348 ++result; 349 } @@ -410,14 +410,14 @@ if ('onhashchange' in window) { 351 return result; 352 } 353 -354 bool trace_doesnt_contain(string label, string line) { -355 return trace_count(label, line) == 0; +354 bool trace_doesnt_contain(string label, string line) { +355 return trace_count(label, line) == 0; 356 } 357 358 bool trace_doesnt_contain(string expected) { 359 vector<string> tmp = split_first(expected, ": "); 360 if (SIZE(tmp) == 1) { -361 raise << expected << ": missing label or contents in trace line\n" << end(); +361 raise << expected << ": missing label or contents in trace line\n" << end(); 362 assert(false); 363 } 364 return trace_doesnt_contain(tmp.at(0), tmp.at(1)); diff --git a/html/subx/011run.cc.html b/html/subx/011run.cc.html index ef265021..0428490f 100644 --- a/html/subx/011run.cc.html +++ b/html/subx/011run.cc.html @@ -148,9 +148,9 @@ if ('onhashchange' in window) { 85 istringstream in(text_bytes); 86 parse(in, p); 87 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately - 88 transform(p); + 88 transform(p); 89 if (trace_contains_errors()) return; - 90 load(p); + 90 load(p); 91 if (trace_contains_errors()) return; 92 while (EIP < End_of_program) 93 run_one_instruction(); @@ -201,206 +201,219 @@ if ('onhashchange' in window) { 138 getline(fin, line_data); 139 curr.original = line_data; 140 trace(99, "parse") << "line: " << line_data << end(); -141 istringstream lin(line_data); -142 while (has_data(lin)) { -143 string word_data; -144 lin >> word_data; -145 if (word_data.empty()) continue; -146 if (word_data[0] == '#') break; // comment -147 if (word_data == ".") continue; // comment token -148 if (word_data == "==") { -149 if (!l.empty()) { -150 assert(!out.segments.empty()); -151 trace(99, "parse") << "flushing to segment" << end(); -152 out.segments.back().lines.swap(l); -153 } -154 segment s; -155 string segment_title; -156 lin >> segment_title; -157 if (starts_with(segment_title, "0x")) -158 s.start = parse_int(segment_title); -159 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); -160 out.segments.push_back(s); -161 // todo? -162 break; // skip rest of line -163 } -164 if (word_data[0] == ':') { -165 // todo: line metadata -166 break; -167 } -168 curr.words.push_back(word()); -169 curr.words.back().original = word_data; -170 istringstream win(word_data); -171 if (getline(win, curr.words.back().data, '/')) { -172 string m; -173 while (getline(win, m, '/')) -174 curr.words.back().metadata.push_back(m); -175 } -176 trace(99, "parse") << "new word: " << curr.words.back().data << end(); -177 } -178 if (!curr.words.empty()) -179 l.push_back(curr); +141 // End Line Parsing Special-cases(line_data -> l) +142 istringstream lin(line_data); +143 while (has_data(lin)) { +144 string word_data; +145 lin >> word_data; +146 if (word_data.empty()) continue; +147 if (word_data[0] == '#') break; // comment +148 if (word_data == ".") continue; // comment token +149 if (word_data == "==") { +150 if (!l.empty()) { +151 assert(!out.segments.empty()); +152 trace(99, "parse") << "flushing to segment" << end(); +153 out.segments.back().lines.swap(l); +154 } +155 segment s; +156 string segment_title; +157 lin >> segment_title; +158 if (starts_with(segment_title, "0x")) +159 s.start = parse_int(segment_title); +160 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); +161 out.segments.push_back(s); +162 // todo? +163 break; // skip rest of line +164 } +165 if (word_data[0] == ':') { +166 // todo: line metadata +167 break; +168 } +169 curr.words.push_back(word()); +170 parse_word(word_data, curr.words.back()); +171 trace(99, "parse") << "word: " << to_string(curr.words.back()); +172 } +173 if (!curr.words.empty()) +174 l.push_back(curr); +175 } +176 if (!l.empty()) { +177 assert(!out.segments.empty()); +178 trace(99, "parse") << "flushing to segment" << end(); +179 out.segments.back().lines.swap(l); 180 } -181 if (!l.empty()) { -182 assert(!out.segments.empty()); -183 trace(99, "parse") << "flushing to segment" << end(); -184 out.segments.back().lines.swap(l); -185 } -186 trace(99, "parse") << "done" << end(); -187 } -188 -189 //:: transform -190 -191 :(before "End Types") -192 typedef void (*transform_fn)(program&); -193 :(before "End Globals") -194 vector<transform_fn> Transform; -195 -196 void transform(program& p) { -197 trace(99, "transform") << "begin" << end(); -198 for (int t = 0; t < SIZE(Transform); ++t) -199 (*Transform.at(t))(p); -200 trace(99, "transform") << "done" << end(); -201 } -202 -203 //:: load -204 -205 void load(const program& p) { -206 trace(99, "load") << "begin" << end(); -207 if (p.segments.empty()) { -208 raise << "no code to run\n" << end(); -209 return; -210 } -211 for (int i = 0; i < SIZE(p.segments); ++i) { -212 const segment& seg = p.segments.at(i); -213 uint32_t addr = seg.start; -214 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); -215 for (int j = 0; j < SIZE(seg.lines); ++j) { -216 const line& l = seg.lines.at(j); -217 for (int k = 0; k < SIZE(l.words); ++k) { -218 const word& w = l.words.at(k); -219 uint8_t val = hex_byte(w.data); -220 if (trace_contains_errors()) return; -221 write_mem_u8(addr, val); -222 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); -223 ++addr; -224 } -225 } -226 if (i == 0) End_of_program = addr; -227 } -228 EIP = p.segments.at(0).start; -229 trace(99, "load") << "done" << end(); -230 } -231 -232 uint8_t hex_byte(const string& s) { -233 istringstream in(s); -234 int result = 0; -235 in >> std::hex >> result; -236 if (!in || !in.eof()) { -237 raise << "token '" << s << "' is not a hex byte\n" << end(); -238 return '\0'; -239 } -240 if (result > 0xff || result < -0x8f) { -241 raise << "token '" << s << "' is not a hex byte\n" << end(); -242 return '\0'; -243 } -244 return static_cast<uint8_t>(result); -245 } -246 -247 :(scenarios parse_and_load) -248 :(scenario number_too_large) -249 % Hide_errors = true; -250 == 0x1 -251 05 cab -252 +error: token 'cab' is not a hex byte -253 -254 :(scenario invalid_hex) -255 % Hide_errors = true; -256 == 0x1 -257 05 cx -258 +error: token 'cx' is not a hex byte +181 trace(99, "parse") << "done" << end(); +182 } +183 +184 void parse_word(const string& data, word& out) { +185 out.original = data; +186 istringstream win(data); +187 if (getline(win, out.data, '/')) { +188 string m; +189 while (getline(win, m, '/')) +190 out.metadata.push_back(m); +191 } +192 } +193 +194 string to_string(const word& w) { +195 ostringstream out; +196 out << w.data; +197 for (int i = 0; i < SIZE(w.metadata); ++i) +198 out << " /" << w.metadata.at(i); +199 return out.str(); +200 } +201 +202 //:: transform +203 +204 :(before "End Types") +205 typedef void (*transform_fn)(program&); +206 :(before "End Globals") +207 vector<transform_fn> Transform; +208 +209 void transform(program& p) { +210 trace(99, "transform") << "begin" << end(); +211 for (int t = 0; t < SIZE(Transform); ++t) +212 (*Transform.at(t))(p); +213 trace(99, "transform") << "done" << end(); +214 } +215 +216 //:: load +217 +218 void load(const program& p) { +219 trace(99, "load") << "begin" << end(); +220 if (p.segments.empty()) { +221 raise << "no code to run\n" << end(); +222 return; +223 } +224 for (int i = 0; i < SIZE(p.segments); ++i) { +225 const segment& seg = p.segments.at(i); +226 uint32_t addr = seg.start; +227 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); +228 for (int j = 0; j < SIZE(seg.lines); ++j) { +229 const line& l = seg.lines.at(j); +230 for (int k = 0; k < SIZE(l.words); ++k) { +231 const word& w = l.words.at(k); +232 uint8_t val = hex_byte(w.data); +233 if (trace_contains_errors()) return; +234 write_mem_u8(addr, val); +235 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); +236 ++addr; +237 } +238 } +239 if (i == 0) End_of_program = addr; +240 } +241 EIP = p.segments.at(0).start; +242 trace(99, "load") << "done" << end(); +243 } +244 +245 uint8_t hex_byte(const string& s) { +246 istringstream in(s); +247 int result = 0; +248 in >> std::hex >> result; +249 if (!in || !in.eof()) { +250 raise << "token '" << s << "' is not a hex byte\n" << end(); +251 return '\0'; +252 } +253 if (result > 0xff || result < -0x8f) { +254 raise << "token '" << s << "' is not a hex byte\n" << end(); +255 return '\0'; +256 } +257 return static_cast<uint8_t>(result); +258 } 259 -260 :(scenario negative_number) -261 == 0x1 -262 05 -12 -263 $error: 0 -264 -265 :(scenario negative_number_too_small) -266 % Hide_errors = true; -267 == 0x1 -268 05 -12345 -269 +error: token '-12345' is not a hex byte -270 -271 :(scenario hex_prefix) -272 == 0x1 -273 0x05 -0x12 -274 $error: 0 -275 -276 //: helper for tests -277 :(code) -278 void parse_and_load(const string& text_bytes) { -279 program p; -280 istringstream in(text_bytes); -281 parse(in, p); -282 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately -283 load(p); -284 } -285 -286 //:: run -287 -288 :(before "End Initialize Op Names(name)") -289 put(name, "05", "add imm32 to R0 (EAX)"); -290 -291 //: our first opcode -292 :(before "End Single-Byte Opcodes") -293 case 0x05: { // add imm32 to EAX -294 int32_t arg2 = next32(); -295 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); -296 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); -297 break; -298 } -299 -300 :(code) -301 // read a 32-bit int in little-endian order from the instruction stream -302 int32_t next32() { -303 int32_t result = next(); -304 result |= (next()<<8); -305 result |= (next()<<16); -306 result |= (next()<<24); -307 return result; -308 } -309 -310 :(code) -311 int32_t parse_int(const string& s) { -312 if (s.empty()) return 0; -313 istringstream in(s); -314 in >> std::hex; -315 if (s.at(0) == '-') { -316 int32_t result = 0; -317 in >> result; -318 if (!in || !in.eof()) { -319 raise << "not a number: " << s << '\n' << end(); -320 return 0; -321 } -322 return result; -323 } -324 uint32_t uresult = 0; -325 in >> uresult; -326 if (!in || !in.eof()) { -327 raise << "not a number: " << s << '\n' << end(); -328 return 0; -329 } -330 return static_cast<int32_t>(uresult); -331 } -332 :(before "End Unit Tests") -333 void test_parse_int() { -334 CHECK_EQ(0, parse_int("0")); -335 CHECK_EQ(0, parse_int("0x0")); -336 CHECK_EQ(0, parse_int("0x0")); -337 CHECK_EQ(16, parse_int("10")); // hex always -338 CHECK_EQ(-1, parse_int("-1")); -339 CHECK_EQ(-1, parse_int("0xffffffff")); -340 } +260 :(scenarios parse_and_load) +261 :(scenario number_too_large) +262 % Hide_errors = true; +263 == 0x1 +264 05 cab +265 +error: token 'cab' is not a hex byte +266 +267 :(scenario invalid_hex) +268 % Hide_errors = true; +269 == 0x1 +270 05 cx +271 +error: token 'cx' is not a hex byte +272 +273 :(scenario negative_number) +274 == 0x1 +275 05 -12 +276 $error: 0 +277 +278 :(scenario negative_number_too_small) +279 % Hide_errors = true; +280 == 0x1 +281 05 -12345 +282 +error: token '-12345' is not a hex byte +283 +284 :(scenario hex_prefix) +285 == 0x1 +286 0x05 -0x12 +287 $error: 0 +288 +289 //: helper for tests +290 :(code) +291 void parse_and_load(const string& text_bytes) { +292 program p; +293 istringstream in(text_bytes); +294 parse(in, p); +295 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately +296 load(p); +297 } +298 +299 //:: run +300 +301 :(before "End Initialize Op Names(name)") +302 put(name, "05", "add imm32 to R0 (EAX)"); +303 +304 //: our first opcode +305 :(before "End Single-Byte Opcodes") +306 case 0x05: { // add imm32 to EAX +307 int32_t arg2 = next32(); +308 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); +309 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); +310 break; +311 } +312 +313 :(code) +314 // read a 32-bit int in little-endian order from the instruction stream +315 int32_t next32() { +316 int32_t result = next(); +317 result |= (next()<<8); +318 result |= (next()<<16); +319 result |= (next()<<24); +320 return result; +321 } +322 +323 :(code) +324 int32_t parse_int(const string& s) { +325 if (s.empty()) return 0; +326 istringstream in(s); +327 in >> std::hex; +328 if (s.at(0) == '-') { +329 int32_t result = 0; +330 in >> result; +331 if (!in || !in.eof()) { +332 raise << "not a number: " << s << '\n' << end(); +333 return 0; +334 } +335 return result; +336 } +337 uint32_t uresult = 0; +338 in >> uresult; +339 if (!in || !in.eof()) { +340 raise << "not a number: " << s << '\n' << end(); +341 return 0; +342 } +343 return static_cast<int32_t>(uresult); +344 } +345 :(before "End Unit Tests") +346 void test_parse_int() { +347 CHECK_EQ(0, parse_int("0")); +348 CHECK_EQ(0, parse_int("0x0")); +349 CHECK_EQ(0, parse_int("0x0")); +350 CHECK_EQ(16, parse_int("10")); // hex always +351 CHECK_EQ(-1, parse_int("-1")); +352 CHECK_EQ(-1, parse_int("0xffffffff")); +353 } diff --git a/html/subx/013direct_addressing.cc.html b/html/subx/013direct_addressing.cc.html index 0c945efa..d4e8482c 100644 --- a/html/subx/013direct_addressing.cc.html +++ b/html/subx/013direct_addressing.cc.html @@ -83,7 +83,7 @@ if ('onhashchange' in window) { 18 case 0x01: { // add r32 to r/m32 19 uint8_t modrm = next(); 20 uint8_t arg2 = (modrm>>3)&0x7; - 21 trace(90, "run") << "add " << rname(arg2) << " to r/m32" << end(); + 21 trace(90, "run") << "add " << rname(arg2) << " to r/m32" << end(); 22 int32_t* arg1 = effective_address(modrm); 23 BINARY_ARITHMETIC_OP(+, *arg1, Reg[arg2].i); 24 break; @@ -97,551 +97,563 @@ if ('onhashchange' in window) { 32 uint8_t mod = (modrm>>6); 33 // ignore middle 3 'reg opcode' bits 34 uint8_t rm = modrm & 0x7; - 35 uint32_t addr = 0; - 36 switch (mod) { - 37 case 3: - 38 // mod 3 is just register direct addressing - 39 trace(90, "run") << "r/m32 is " << rname(rm) << end(); - 40 return &Reg[rm].i; - 41 // End Mod Special-cases(addr) - 42 default: - 43 cerr << "unrecognized mod bits: " << NUM(mod) << '\n'; - 44 exit(1); - 45 } - 46 //: other mods are indirect, and they'll set addr appropriately - 47 return mem_addr_i32(addr); - 48 } - 49 - 50 string rname(uint8_t r) { - 51 switch (r) { - 52 case 0: return "EAX"; - 53 case 1: return "ECX"; - 54 case 2: return "EDX"; - 55 case 3: return "EBX"; - 56 case 4: return "ESP"; - 57 case 5: return "EBP"; - 58 case 6: return "ESI"; - 59 case 7: return "EDI"; - 60 default: raise << "invalid register " << r << '\n' << end(); return ""; - 61 } - 62 } - 63 - 64 //:: subtract - 65 - 66 :(before "End Initialize Op Names(name)") - 67 put(name, "29", "subtract r32 from rm32"); - 68 - 69 :(scenario subtract_r32_from_r32) - 70 % Reg[EAX].i = 10; - 71 % Reg[EBX].i = 1; - 72 == 0x1 - 73 # op ModR/M SIB displacement immediate - 74 29 d8 # subtract EBX from EAX - 75 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) - 76 +run: subtract EBX from r/m32 - 77 +run: r/m32 is EAX - 78 +run: storing 0x00000009 - 79 - 80 :(before "End Single-Byte Opcodes") - 81 case 0x29: { // subtract r32 from r/m32 - 82 uint8_t modrm = next(); - 83 uint8_t arg2 = (modrm>>3)&0x7; - 84 trace(90, "run") << "subtract " << rname(arg2) << " from r/m32" << end(); - 85 int32_t* arg1 = effective_address(modrm); - 86 BINARY_ARITHMETIC_OP(-, *arg1, Reg[arg2].i); - 87 break; - 88 } - 89 - 90 //:: multiply + 35 if (mod == 3) { + 36 // mod 3 is just register direct addressing + 37 trace(90, "run") << "r/m32 is " << rname(rm) << end(); + 38 return &Reg[rm].i; + 39 } + 40 return mem_addr_i32(effective_address_number(modrm)); + 41 } + 42 + 43 uint32_t effective_address_number(uint8_t modrm) { + 44 uint8_t mod = (modrm>>6); + 45 // ignore middle 3 'reg opcode' bits + 46 uint8_t rm = modrm & 0x7; + 47 uint32_t addr = 0; + 48 switch (mod) { + 49 case 3: + 50 // mod 3 is just register direct addressing + 51 raise << "unexpected direct addressing mode\n" << end(); + 52 return 0; + 53 // End Mod Special-cases(addr) + 54 default: + 55 cerr << "unrecognized mod bits: " << NUM(mod) << '\n'; + 56 exit(1); + 57 } + 58 //: other mods are indirect, and they'll set addr appropriately + 59 return addr; + 60 } + 61 + 62 string rname(uint8_t r) { + 63 switch (r) { + 64 case 0: return "EAX"; + 65 case 1: return "ECX"; + 66 case 2: return "EDX"; + 67 case 3: return "EBX"; + 68 case 4: return "ESP"; + 69 case 5: return "EBP"; + 70 case 6: return "ESI"; + 71 case 7: return "EDI"; + 72 default: raise << "invalid register " << r << '\n' << end(); return ""; + 73 } + 74 } + 75 + 76 //:: subtract + 77 + 78 :(before "End Initialize Op Names(name)") + 79 put(name, "29", "subtract r32 from rm32"); + 80 + 81 :(scenario subtract_r32_from_r32) + 82 % Reg[EAX].i = 10; + 83 % Reg[EBX].i = 1; + 84 == 0x1 + 85 # op ModR/M SIB displacement immediate + 86 29 d8 # subtract EBX from EAX + 87 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) + 88 +run: subtract EBX from r/m32 + 89 +run: r/m32 is EAX + 90 +run: storing 0x00000009 91 - 92 :(before "End Initialize Op Names(name)") - 93 put(name, "f7", "test/negate/mul/div rm32 (with EAX if necessary) depending on subop"); - 94 - 95 :(scenario multiply_eax_by_r32) - 96 % Reg[EAX].i = 4; - 97 % Reg[ECX].i = 3; - 98 == 0x1 - 99 # op ModR/M SIB displacement immediate -100 f7 e1 # multiply EAX by ECX -101 # ModR/M in binary: 11 (direct mode) 100 (subop mul) 001 (src ECX) -102 +run: operate on r/m32 -103 +run: r/m32 is ECX -104 +run: subop: multiply EAX by r/m32 -105 +run: storing 0x0000000c + 92 :(before "End Single-Byte Opcodes") + 93 case 0x29: { // subtract r32 from r/m32 + 94 uint8_t modrm = next(); + 95 uint8_t arg2 = (modrm>>3)&0x7; + 96 trace(90, "run") << "subtract " << rname(arg2) << " from r/m32" << end(); + 97 int32_t* arg1 = effective_address(modrm); + 98 BINARY_ARITHMETIC_OP(-, *arg1, Reg[arg2].i); + 99 break; +100 } +101 +102 //:: multiply +103 +104 :(before "End Initialize Op Names(name)") +105 put(name, "f7", "test/negate/mul/div rm32 (with EAX if necessary) depending on subop"); 106 -107 :(before "End Single-Byte Opcodes") -108 case 0xf7: { // xor r32 with r/m32 -109 uint8_t modrm = next(); -110 trace(90, "run") << "operate on r/m32" << end(); -111 int32_t* arg1 = effective_address(modrm); -112 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits -113 switch (subop) { -114 case 4: { // mul unsigned EAX by r/m32 -115 trace(90, "run") << "subop: multiply EAX by r/m32" << end(); -116 uint64_t result = Reg[EAX].u * static_cast<uint32_t>(*arg1); -117 Reg[EAX].u = result & 0xffffffff; -118 Reg[EDX].u = result >> 32; -119 OF = (Reg[EDX].u != 0); -120 trace(90, "run") << "storing 0x" << HEXWORD << Reg[EAX].u << end(); -121 break; -122 } -123 // End Op f7 Subops -124 default: -125 cerr << "unrecognized sub-opcode after f7: " << NUM(subop) << '\n'; -126 exit(1); -127 } -128 break; -129 } -130 -131 //: -132 -133 :(before "End Initialize Op Names(name)") -134 put(name_0f, "af", "multiply rm32 into r32"); -135 -136 :(scenario multiply_r32_into_r32) -137 % Reg[EAX].i = 4; -138 % Reg[EBX].i = 2; -139 == 0x1 -140 # op ModR/M SIB displacement immediate -141 0f af d8 # subtract EBX into EAX -142 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -143 +run: multiply r/m32 into EBX -144 +run: r/m32 is EAX -145 +run: storing 0x00000008 -146 -147 :(before "End Two-Byte Opcodes Starting With 0f") -148 case 0xaf: { // multiply r32 into r/m32 -149 uint8_t modrm = next(); -150 uint8_t arg2 = (modrm>>3)&0x7; -151 trace(90, "run") << "multiply r/m32 into " << rname(arg2) << end(); -152 int32_t* arg1 = effective_address(modrm); -153 BINARY_ARITHMETIC_OP(*, Reg[arg2].i, *arg1); -154 break; -155 } -156 -157 //:: and +107 :(scenario multiply_eax_by_r32) +108 % Reg[EAX].i = 4; +109 % Reg[ECX].i = 3; +110 == 0x1 +111 # op ModR/M SIB displacement immediate +112 f7 e1 # multiply EAX by ECX +113 # ModR/M in binary: 11 (direct mode) 100 (subop mul) 001 (src ECX) +114 +run: operate on r/m32 +115 +run: r/m32 is ECX +116 +run: subop: multiply EAX by r/m32 +117 +run: storing 0x0000000c +118 +119 :(before "End Single-Byte Opcodes") +120 case 0xf7: { // xor r32 with r/m32 +121 uint8_t modrm = next(); +122 trace(90, "run") << "operate on r/m32" << end(); +123 int32_t* arg1 = effective_address(modrm); +124 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits +125 switch (subop) { +126 case 4: { // mul unsigned EAX by r/m32 +127 trace(90, "run") << "subop: multiply EAX by r/m32" << end(); +128 uint64_t result = Reg[EAX].u * static_cast<uint32_t>(*arg1); +129 Reg[EAX].u = result & 0xffffffff; +130 Reg[EDX].u = result >> 32; +131 OF = (Reg[EDX].u != 0); +132 trace(90, "run") << "storing 0x" << HEXWORD << Reg[EAX].u << end(); +133 break; +134 } +135 // End Op f7 Subops +136 default: +137 cerr << "unrecognized sub-opcode after f7: " << NUM(subop) << '\n'; +138 exit(1); +139 } +140 break; +141 } +142 +143 //: +144 +145 :(before "End Initialize Op Names(name)") +146 put(name_0f, "af", "multiply rm32 into r32"); +147 +148 :(scenario multiply_r32_into_r32) +149 % Reg[EAX].i = 4; +150 % Reg[EBX].i = 2; +151 == 0x1 +152 # op ModR/M SIB displacement immediate +153 0f af d8 # subtract EBX into EAX +154 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +155 +run: multiply r/m32 into EBX +156 +run: r/m32 is EAX +157 +run: storing 0x00000008 158 -159 :(before "End Initialize Op Names(name)") -160 put(name, "21", "rm32 = bitwise AND of r32 with rm32"); -161 -162 :(scenario and_r32_with_r32) -163 % Reg[EAX].i = 0x0a0b0c0d; -164 % Reg[EBX].i = 0x000000ff; -165 == 0x1 -166 # op ModR/M SIB displacement immediate -167 21 d8 # and EBX with destination EAX -168 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -169 +run: and EBX with r/m32 -170 +run: r/m32 is EAX -171 +run: storing 0x0000000d -172 -173 :(before "End Single-Byte Opcodes") -174 case 0x21: { // and r32 with r/m32 -175 uint8_t modrm = next(); -176 uint8_t arg2 = (modrm>>3)&0x7; -177 trace(90, "run") << "and " << rname(arg2) << " with r/m32" << end(); -178 int32_t* arg1 = effective_address(modrm); -179 BINARY_BITWISE_OP(&, *arg1, Reg[arg2].u); -180 break; -181 } -182 -183 //:: or +159 :(before "End Two-Byte Opcodes Starting With 0f") +160 case 0xaf: { // multiply r32 into r/m32 +161 uint8_t modrm = next(); +162 uint8_t arg2 = (modrm>>3)&0x7; +163 trace(90, "run") << "multiply r/m32 into " << rname(arg2) << end(); +164 int32_t* arg1 = effective_address(modrm); +165 BINARY_ARITHMETIC_OP(*, Reg[arg2].i, *arg1); +166 break; +167 } +168 +169 //:: and +170 +171 :(before "End Initialize Op Names(name)") +172 put(name, "21", "rm32 = bitwise AND of r32 with rm32"); +173 +174 :(scenario and_r32_with_r32) +175 % Reg[EAX].i = 0x0a0b0c0d; +176 % Reg[EBX].i = 0x000000ff; +177 == 0x1 +178 # op ModR/M SIB displacement immediate +179 21 d8 # and EBX with destination EAX +180 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +181 +run: and EBX with r/m32 +182 +run: r/m32 is EAX +183 +run: storing 0x0000000d 184 -185 :(before "End Initialize Op Names(name)") -186 put(name, "09", "rm32 = bitwise OR of r32 with rm32"); -187 -188 :(scenario or_r32_with_r32) -189 % Reg[EAX].i = 0x0a0b0c0d; -190 % Reg[EBX].i = 0xa0b0c0d0; -191 == 0x1 -192 # op ModR/M SIB displacement immediate -193 09 d8 # or EBX with destination EAX -194 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -195 +run: or EBX with r/m32 -196 +run: r/m32 is EAX -197 +run: storing 0xaabbccdd -198 -199 :(before "End Single-Byte Opcodes") -200 case 0x09: { // or r32 with r/m32 -201 uint8_t modrm = next(); -202 uint8_t arg2 = (modrm>>3)&0x7; -203 trace(90, "run") << "or " << rname(arg2) << " with r/m32" << end(); -204 int32_t* arg1 = effective_address(modrm); -205 BINARY_BITWISE_OP(|, *arg1, Reg[arg2].u); -206 break; -207 } -208 -209 //:: xor +185 :(before "End Single-Byte Opcodes") +186 case 0x21: { // and r32 with r/m32 +187 uint8_t modrm = next(); +188 uint8_t arg2 = (modrm>>3)&0x7; +189 trace(90, "run") << "and " << rname(arg2) << " with r/m32" << end(); +190 int32_t* arg1 = effective_address(modrm); +191 BINARY_BITWISE_OP(&, *arg1, Reg[arg2].u); +192 break; +193 } +194 +195 //:: or +196 +197 :(before "End Initialize Op Names(name)") +198 put(name, "09", "rm32 = bitwise OR of r32 with rm32"); +199 +200 :(scenario or_r32_with_r32) +201 % Reg[EAX].i = 0x0a0b0c0d; +202 % Reg[EBX].i = 0xa0b0c0d0; +203 == 0x1 +204 # op ModR/M SIB displacement immediate +205 09 d8 # or EBX with destination EAX +206 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +207 +run: or EBX with r/m32 +208 +run: r/m32 is EAX +209 +run: storing 0xaabbccdd 210 -211 :(before "End Initialize Op Names(name)") -212 put(name, "31", "rm32 = bitwise XOR of r32 with rm32"); -213 -214 :(scenario xor_r32_with_r32) -215 % Reg[EAX].i = 0x0a0b0c0d; -216 % Reg[EBX].i = 0xaabbc0d0; -217 == 0x1 -218 # op ModR/M SIB displacement immediate -219 31 d8 # xor EBX with destination EAX -220 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -221 +run: xor EBX with r/m32 -222 +run: r/m32 is EAX -223 +run: storing 0xa0b0ccdd -224 -225 :(before "End Single-Byte Opcodes") -226 case 0x31: { // xor r32 with r/m32 -227 uint8_t modrm = next(); -228 uint8_t arg2 = (modrm>>3)&0x7; -229 trace(90, "run") << "xor " << rname(arg2) << " with r/m32" << end(); -230 int32_t* arg1 = effective_address(modrm); -231 BINARY_BITWISE_OP(^, *arg1, Reg[arg2].u); -232 break; -233 } -234 -235 //:: not +211 :(before "End Single-Byte Opcodes") +212 case 0x09: { // or r32 with r/m32 +213 uint8_t modrm = next(); +214 uint8_t arg2 = (modrm>>3)&0x7; +215 trace(90, "run") << "or " << rname(arg2) << " with r/m32" << end(); +216 int32_t* arg1 = effective_address(modrm); +217 BINARY_BITWISE_OP(|, *arg1, Reg[arg2].u); +218 break; +219 } +220 +221 //:: xor +222 +223 :(before "End Initialize Op Names(name)") +224 put(name, "31", "rm32 = bitwise XOR of r32 with rm32"); +225 +226 :(scenario xor_r32_with_r32) +227 % Reg[EAX].i = 0x0a0b0c0d; +228 % Reg[EBX].i = 0xaabbc0d0; +229 == 0x1 +230 # op ModR/M SIB displacement immediate +231 31 d8 # xor EBX with destination EAX +232 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +233 +run: xor EBX with r/m32 +234 +run: r/m32 is EAX +235 +run: storing 0xa0b0ccdd 236 -237 :(before "End Initialize Op Names(name)") -238 put(name, "f7", "bitwise complement of rm32"); -239 -240 :(scenario not_r32) -241 % Reg[EBX].i = 0x0f0f00ff; -242 == 0x1 -243 # op ModR/M SIB displacement immediate -244 f7 d3 # not EBX -245 # ModR/M in binary: 11 (direct mode) 010 (subop not) 011 (dest EBX) -246 +run: operate on r/m32 -247 +run: r/m32 is EBX -248 +run: subop: not -249 +run: storing 0xf0f0ff00 -250 -251 :(before "End Op f7 Subops") -252 case 2: { // not r/m32 -253 trace(90, "run") << "subop: not" << end(); -254 *arg1 = ~(*arg1); -255 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); -256 SF = (*arg1 >> 31); -257 ZF = (*arg1 == 0); -258 OF = false; -259 break; -260 } -261 -262 //:: compare (cmp) -263 -264 :(before "End Initialize Op Names(name)") -265 put(name, "39", "compare: set SF if rm32 < r32"); -266 -267 :(scenario compare_r32_with_r32_greater) -268 % Reg[EAX].i = 0x0a0b0c0d; -269 % Reg[EBX].i = 0x0a0b0c07; -270 == 0x1 -271 # op ModR/M SIB displacement immediate -272 39 d8 # compare EBX with EAX -273 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -274 +run: compare EBX with r/m32 -275 +run: r/m32 is EAX -276 +run: SF=0; ZF=0; OF=0 -277 -278 :(before "End Single-Byte Opcodes") -279 case 0x39: { // set SF if r/m32 < r32 -280 uint8_t modrm = next(); -281 uint8_t reg2 = (modrm>>3)&0x7; -282 trace(90, "run") << "compare " << rname(reg2) << " with r/m32" << end(); -283 int32_t* arg1 = effective_address(modrm); -284 int32_t arg2 = Reg[reg2].i; -285 int32_t tmp1 = *arg1 - arg2; -286 SF = (tmp1 < 0); -287 ZF = (tmp1 == 0); -288 int64_t tmp2 = *arg1 - arg2; -289 OF = (tmp1 != tmp2); -290 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); -291 break; -292 } -293 -294 :(scenario compare_r32_with_r32_lesser) -295 % Reg[EAX].i = 0x0a0b0c07; -296 % Reg[EBX].i = 0x0a0b0c0d; -297 == 0x1 -298 # op ModR/M SIB displacement immediate -299 39 d8 # compare EBX with EAX -300 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -301 +run: compare EBX with r/m32 -302 +run: r/m32 is EAX -303 +run: SF=1; ZF=0; OF=0 -304 -305 :(scenario compare_r32_with_r32_equal) -306 % Reg[EAX].i = 0x0a0b0c0d; -307 % Reg[EBX].i = 0x0a0b0c0d; -308 == 0x1 -309 # op ModR/M SIB displacement immediate -310 39 d8 # compare EBX with EAX -311 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -312 +run: compare EBX with r/m32 -313 +run: r/m32 is EAX -314 +run: SF=0; ZF=1; OF=0 -315 -316 //:: copy (mov) -317 -318 :(before "End Initialize Op Names(name)") -319 put(name, "89", "copy r32 to rm32"); -320 -321 :(scenario copy_r32_to_r32) -322 % Reg[EBX].i = 0xaf; -323 == 0x1 -324 # op ModR/M SIB displacement immediate -325 89 d8 # copy EBX to EAX -326 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -327 +run: copy EBX to r/m32 -328 +run: r/m32 is EAX -329 +run: storing 0x000000af -330 -331 :(before "End Single-Byte Opcodes") -332 case 0x89: { // copy r32 to r/m32 -333 uint8_t modrm = next(); -334 uint8_t reg2 = (modrm>>3)&0x7; -335 trace(90, "run") << "copy " << rname(reg2) << " to r/m32" << end(); -336 int32_t* arg1 = effective_address(modrm); -337 *arg1 = Reg[reg2].i; -338 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); -339 break; -340 } -341 -342 //:: xchg -343 -344 :(before "End Initialize Op Names(name)") -345 put(name, "87", "swap the contents of r32 and rm32"); -346 -347 :(scenario xchg_r32_with_r32) -348 % Reg[EBX].i = 0xaf; -349 % Reg[EAX].i = 0x2e; -350 == 0x1 -351 # op ModR/M SIB displacement immediate -352 87 d8 # exchange EBX with EAX -353 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) -354 +run: exchange EBX with r/m32 -355 +run: r/m32 is EAX -356 +run: storing 0x000000af in r/m32 -357 +run: storing 0x0000002e in EBX +237 :(before "End Single-Byte Opcodes") +238 case 0x31: { // xor r32 with r/m32 +239 uint8_t modrm = next(); +240 uint8_t arg2 = (modrm>>3)&0x7; +241 trace(90, "run") << "xor " << rname(arg2) << " with r/m32" << end(); +242 int32_t* arg1 = effective_address(modrm); +243 BINARY_BITWISE_OP(^, *arg1, Reg[arg2].u); +244 break; +245 } +246 +247 //:: not +248 +249 :(before "End Initialize Op Names(name)") +250 put(name, "f7", "bitwise complement of rm32"); +251 +252 :(scenario not_r32) +253 % Reg[EBX].i = 0x0f0f00ff; +254 == 0x1 +255 # op ModR/M SIB displacement immediate +256 f7 d3 # not EBX +257 # ModR/M in binary: 11 (direct mode) 010 (subop not) 011 (dest EBX) +258 +run: operate on r/m32 +259 +run: r/m32 is EBX +260 +run: subop: not +261 +run: storing 0xf0f0ff00 +262 +263 :(before "End Op f7 Subops") +264 case 2: { // not r/m32 +265 trace(90, "run") << "subop: not" << end(); +266 *arg1 = ~(*arg1); +267 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); +268 SF = (*arg1 >> 31); +269 ZF = (*arg1 == 0); +270 OF = false; +271 break; +272 } +273 +274 //:: compare (cmp) +275 +276 :(before "End Initialize Op Names(name)") +277 put(name, "39", "compare: set SF if rm32 < r32"); +278 +279 :(scenario compare_r32_with_r32_greater) +280 % Reg[EAX].i = 0x0a0b0c0d; +281 % Reg[EBX].i = 0x0a0b0c07; +282 == 0x1 +283 # op ModR/M SIB displacement immediate +284 39 d8 # compare EBX with EAX +285 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +286 +run: compare EBX with r/m32 +287 +run: r/m32 is EAX +288 +run: SF=0; ZF=0; OF=0 +289 +290 :(before "End Single-Byte Opcodes") +291 case 0x39: { // set SF if r/m32 < r32 +292 uint8_t modrm = next(); +293 uint8_t reg2 = (modrm>>3)&0x7; +294 trace(90, "run") << "compare " << rname(reg2) << " with r/m32" << end(); +295 int32_t* arg1 = effective_address(modrm); +296 int32_t arg2 = Reg[reg2].i; +297 int32_t tmp1 = *arg1 - arg2; +298 SF = (tmp1 < 0); +299 ZF = (tmp1 == 0); +300 int64_t tmp2 = *arg1 - arg2; +301 OF = (tmp1 != tmp2); +302 trace(90, "run") << "SF=" << SF << "; ZF=" << ZF << "; OF=" << OF << end(); +303 break; +304 } +305 +306 :(scenario compare_r32_with_r32_lesser) +307 % Reg[EAX].i = 0x0a0b0c07; +308 % Reg[EBX].i = 0x0a0b0c0d; +309 == 0x1 +310 # op ModR/M SIB displacement immediate +311 39 d8 # compare EBX with EAX +312 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +313 +run: compare EBX with r/m32 +314 +run: r/m32 is EAX +315 +run: SF=1; ZF=0; OF=0 +316 +317 :(scenario compare_r32_with_r32_equal) +318 % Reg[EAX].i = 0x0a0b0c0d; +319 % Reg[EBX].i = 0x0a0b0c0d; +320 == 0x1 +321 # op ModR/M SIB displacement immediate +322 39 d8 # compare EBX with EAX +323 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +324 +run: compare EBX with r/m32 +325 +run: r/m32 is EAX +326 +run: SF=0; ZF=1; OF=0 +327 +328 //:: copy (mov) +329 +330 :(before "End Initialize Op Names(name)") +331 put(name, "89", "copy r32 to rm32"); +332 +333 :(scenario copy_r32_to_r32) +334 % Reg[EBX].i = 0xaf; +335 == 0x1 +336 # op ModR/M SIB displacement immediate +337 89 d8 # copy EBX to EAX +338 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +339 +run: copy EBX to r/m32 +340 +run: r/m32 is EAX +341 +run: storing 0x000000af +342 +343 :(before "End Single-Byte Opcodes") +344 case 0x89: { // copy r32 to r/m32 +345 uint8_t modrm = next(); +346 uint8_t reg2 = (modrm>>3)&0x7; +347 trace(90, "run") << "copy " << rname(reg2) << " to r/m32" << end(); +348 int32_t* arg1 = effective_address(modrm); +349 *arg1 = Reg[reg2].i; +350 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); +351 break; +352 } +353 +354 //:: xchg +355 +356 :(before "End Initialize Op Names(name)") +357 put(name, "87", "swap the contents of r32 and rm32"); 358 -359 :(before "End Single-Byte Opcodes") -360 case 0x87: { // exchange r32 with r/m32 -361 uint8_t modrm = next(); -362 uint8_t reg2 = (modrm>>3)&0x7; -363 trace(90, "run") << "exchange " << rname(reg2) << " with r/m32" << end(); -364 int32_t* arg1 = effective_address(modrm); -365 int32_t tmp = *arg1; -366 *arg1 = Reg[reg2].i; -367 Reg[reg2].i = tmp; -368 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end(); -369 trace(90, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end(); -370 break; -371 } -372 -373 //:: increment -374 -375 :(before "End Initialize Op Names(name)") -376 put(name, "40", "increment R0 (EAX)"); -377 put(name, "41", "increment R1 (ECX)"); -378 put(name, "42", "increment R2 (EDX)"); -379 put(name, "43", "increment R3 (EBX)"); -380 put(name, "44", "increment R4 (ESP)"); -381 put(name, "45", "increment R5 (EBP)"); -382 put(name, "46", "increment R6 (ESI)"); -383 put(name, "47", "increment R7 (EDI)"); +359 :(scenario xchg_r32_with_r32) +360 % Reg[EBX].i = 0xaf; +361 % Reg[EAX].i = 0x2e; +362 == 0x1 +363 # op ModR/M SIB displacement immediate +364 87 d8 # exchange EBX with EAX +365 # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) +366 +run: exchange EBX with r/m32 +367 +run: r/m32 is EAX +368 +run: storing 0x000000af in r/m32 +369 +run: storing 0x0000002e in EBX +370 +371 :(before "End Single-Byte Opcodes") +372 case 0x87: { // exchange r32 with r/m32 +373 uint8_t modrm = next(); +374 uint8_t reg2 = (modrm>>3)&0x7; +375 trace(90, "run") << "exchange " << rname(reg2) << " with r/m32" << end(); +376 int32_t* arg1 = effective_address(modrm); +377 int32_t tmp = *arg1; +378 *arg1 = Reg[reg2].i; +379 Reg[reg2].i = tmp; +380 trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end(); +381 trace(90, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end(); +382 break; +383 } 384 -385 :(scenario increment_r32) -386 % Reg[ECX].u = 0x1f; -387 == 0x1 # code segment -388 # op ModR/M SIB displacement immediate -389 41 # increment ECX -390 +run: increment ECX -391 +run: storing value 0x00000020 -392 -393 :(before "End Single-Byte Opcodes") -394 case 0x40: -395 case 0x41: -396 case 0x42: -397 case 0x43: -398 case 0x44: -399 case 0x45: -400 case 0x46: -401 case 0x47: { // increment r32 -402 uint8_t reg = op & 0x7; -403 trace(90, "run") << "increment " << rname(reg) << end(); -404 ++Reg[reg].u; -405 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); -406 break; -407 } -408 -409 :(before "End Initialize Op Names(name)") -410 put(name, "ff", "inc/dec/jump/push/call rm32 based on subop"); -411 -412 :(scenario increment_rm32) -413 % Reg[EAX].u = 0x20; -414 == 0x1 # code segment -415 # op ModR/M SIB displacement immediate -416 ff c0 # increment EAX -417 # ModR/M in binary: 11 (direct mode) 000 (subop inc) 000 (EAX) -418 +run: increment r/m32 -419 +run: r/m32 is EAX -420 +run: storing value 0x00000021 -421 -422 :(before "End Single-Byte Opcodes") -423 case 0xff: { -424 uint8_t modrm = next(); -425 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits -426 switch (subop) { -427 case 0: { // increment r/m32 -428 trace(90, "run") << "increment r/m32" << end(); -429 int32_t* arg = effective_address(modrm); -430 ++*arg; -431 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); -432 break; -433 } -434 // End Op ff Subops -435 } -436 break; -437 } -438 -439 //:: decrement -440 -441 :(before "End Initialize Op Names(name)") -442 put(name, "48", "decrement R0 (EAX)"); -443 put(name, "49", "decrement R1 (ECX)"); -444 put(name, "4a", "decrement R2 (EDX)"); -445 put(name, "4b", "decrement R3 (EBX)"); -446 put(name, "4c", "decrement R4 (ESP)"); -447 put(name, "4d", "decrement R5 (EBP)"); -448 put(name, "4e", "decrement R6 (ESI)"); -449 put(name, "4f", "decrement R7 (EDI)"); +385 //:: increment +386 +387 :(before "End Initialize Op Names(name)") +388 put(name, "40", "increment R0 (EAX)"); +389 put(name, "41", "increment R1 (ECX)"); +390 put(name, "42", "increment R2 (EDX)"); +391 put(name, "43", "increment R3 (EBX)"); +392 put(name, "44", "increment R4 (ESP)"); +393 put(name, "45", "increment R5 (EBP)"); +394 put(name, "46", "increment R6 (ESI)"); +395 put(name, "47", "increment R7 (EDI)"); +396 +397 :(scenario increment_r32) +398 % Reg[ECX].u = 0x1f; +399 == 0x1 # code segment +400 # op ModR/M SIB displacement immediate +401 41 # increment ECX +402 +run: increment ECX +403 +run: storing value 0x00000020 +404 +405 :(before "End Single-Byte Opcodes") +406 case 0x40: +407 case 0x41: +408 case 0x42: +409 case 0x43: +410 case 0x44: +411 case 0x45: +412 case 0x46: +413 case 0x47: { // increment r32 +414 uint8_t reg = op & 0x7; +415 trace(90, "run") << "increment " << rname(reg) << end(); +416 ++Reg[reg].u; +417 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); +418 break; +419 } +420 +421 :(before "End Initialize Op Names(name)") +422 put(name, "ff", "inc/dec/jump/push/call rm32 based on subop"); +423 +424 :(scenario increment_rm32) +425 % Reg[EAX].u = 0x20; +426 == 0x1 # code segment +427 # op ModR/M SIB displacement immediate +428 ff c0 # increment EAX +429 # ModR/M in binary: 11 (direct mode) 000 (subop inc) 000 (EAX) +430 +run: increment r/m32 +431 +run: r/m32 is EAX +432 +run: storing value 0x00000021 +433 +434 :(before "End Single-Byte Opcodes") +435 case 0xff: { +436 uint8_t modrm = next(); +437 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits +438 switch (subop) { +439 case 0: { // increment r/m32 +440 trace(90, "run") << "increment r/m32" << end(); +441 int32_t* arg = effective_address(modrm); +442 ++*arg; +443 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); +444 break; +445 } +446 // End Op ff Subops +447 } +448 break; +449 } 450 -451 :(scenario decrement_r32) -452 % Reg[ECX].u = 0x1f; -453 == 0x1 # code segment -454 # op ModR/M SIB displacement immediate -455 49 # decrement ECX -456 +run: decrement ECX -457 +run: storing value 0x0000001e -458 -459 :(before "End Single-Byte Opcodes") -460 case 0x48: -461 case 0x49: -462 case 0x4a: -463 case 0x4b: -464 case 0x4c: -465 case 0x4d: -466 case 0x4e: -467 case 0x4f: { // decrement r32 -468 uint8_t reg = op & 0x7; -469 trace(90, "run") << "decrement " << rname(reg) << end(); -470 --Reg[reg].u; -471 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); -472 break; -473 } -474 -475 :(scenario decrement_rm32) -476 % Reg[EAX].u = 0x20; -477 == 0x1 # code segment -478 # op ModR/M SIB displacement immediate -479 ff c8 # decrement EAX -480 # ModR/M in binary: 11 (direct mode) 001 (subop inc) 000 (EAX) -481 +run: decrement r/m32 -482 +run: r/m32 is EAX -483 +run: storing value 0x0000001f -484 -485 :(before "End Op ff Subops") -486 case 1: { // decrement r/m32 -487 trace(90, "run") << "decrement r/m32" << end(); -488 int32_t* arg = effective_address(modrm); -489 --*arg; -490 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); -491 break; -492 } -493 -494 //:: push -495 -496 :(before "End Initialize Op Names(name)") -497 put(name, "50", "push R0 (EAX) to stack"); -498 put(name, "51", "push R1 (ECX) to stack"); -499 put(name, "52", "push R2 (EDX) to stack"); -500 put(name, "53", "push R3 (EBX) to stack"); -501 put(name, "54", "push R4 (ESP) to stack"); -502 put(name, "55", "push R5 (EBP) to stack"); -503 put(name, "56", "push R6 (ESI) to stack"); -504 put(name, "57", "push R7 (EDI) to stack"); +451 //:: decrement +452 +453 :(before "End Initialize Op Names(name)") +454 put(name, "48", "decrement R0 (EAX)"); +455 put(name, "49", "decrement R1 (ECX)"); +456 put(name, "4a", "decrement R2 (EDX)"); +457 put(name, "4b", "decrement R3 (EBX)"); +458 put(name, "4c", "decrement R4 (ESP)"); +459 put(name, "4d", "decrement R5 (EBP)"); +460 put(name, "4e", "decrement R6 (ESI)"); +461 put(name, "4f", "decrement R7 (EDI)"); +462 +463 :(scenario decrement_r32) +464 % Reg[ECX].u = 0x1f; +465 == 0x1 # code segment +466 # op ModR/M SIB displacement immediate +467 49 # decrement ECX +468 +run: decrement ECX +469 +run: storing value 0x0000001e +470 +471 :(before "End Single-Byte Opcodes") +472 case 0x48: +473 case 0x49: +474 case 0x4a: +475 case 0x4b: +476 case 0x4c: +477 case 0x4d: +478 case 0x4e: +479 case 0x4f: { // decrement r32 +480 uint8_t reg = op & 0x7; +481 trace(90, "run") << "decrement " << rname(reg) << end(); +482 --Reg[reg].u; +483 trace(90, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); +484 break; +485 } +486 +487 :(scenario decrement_rm32) +488 % Reg[EAX].u = 0x20; +489 == 0x1 # code segment +490 # op ModR/M SIB displacement immediate +491 ff c8 # decrement EAX +492 # ModR/M in binary: 11 (direct mode) 001 (subop inc) 000 (EAX) +493 +run: decrement r/m32 +494 +run: r/m32 is EAX +495 +run: storing value 0x0000001f +496 +497 :(before "End Op ff Subops") +498 case 1: { // decrement r/m32 +499 trace(90, "run") << "decrement r/m32" << end(); +500 int32_t* arg = effective_address(modrm); +501 --*arg; +502 trace(90, "run") << "storing value 0x" << HEXWORD << *arg << end(); +503 break; +504 } 505 -506 :(scenario push_r32) -507 % Reg[ESP].u = 0x64; -508 % Reg[EBX].i = 0x0000000a; -509 == 0x1 -510 # op ModR/M SIB displacement immediate -511 53 # push EBX to stack -512 +run: push EBX -513 +run: decrementing ESP to 0x00000060 -514 +run: pushing value 0x0000000a -515 -516 :(before "End Single-Byte Opcodes") -517 case 0x50: -518 case 0x51: -519 case 0x52: -520 case 0x53: -521 case 0x54: -522 case 0x55: -523 case 0x56: -524 case 0x57: { // push r32 to stack -525 uint8_t reg = op & 0x7; -526 trace(90, "run") << "push " << rname(reg) << end(); -527 //? cerr << "push: " << NUM(reg) << ": " << Reg[reg].u << " => " << Reg[ESP].u << '\n'; -528 push(Reg[reg].u); -529 break; -530 } -531 -532 //:: pop -533 -534 :(before "End Initialize Op Names(name)") -535 put(name, "58", "pop top of stack to R0 (EAX)"); -536 put(name, "59", "pop top of stack to R1 (ECX)"); -537 put(name, "5a", "pop top of stack to R2 (EDX)"); -538 put(name, "5b", "pop top of stack to R3 (EBX)"); -539 put(name, "5c", "pop top of stack to R4 (ESP)"); -540 put(name, "5d", "pop top of stack to R5 (EBP)"); -541 put(name, "5e", "pop top of stack to R6 (ESI)"); -542 put(name, "5f", "pop top of stack to R7 (EDI)"); +506 //:: push +507 +508 :(before "End Initialize Op Names(name)") +509 put(name, "50", "push R0 (EAX) to stack"); +510 put(name, "51", "push R1 (ECX) to stack"); +511 put(name, "52", "push R2 (EDX) to stack"); +512 put(name, "53", "push R3 (EBX) to stack"); +513 put(name, "54", "push R4 (ESP) to stack"); +514 put(name, "55", "push R5 (EBP) to stack"); +515 put(name, "56", "push R6 (ESI) to stack"); +516 put(name, "57", "push R7 (EDI) to stack"); +517 +518 :(scenario push_r32) +519 % Reg[ESP].u = 0x64; +520 % Reg[EBX].i = 0x0000000a; +521 == 0x1 +522 # op ModR/M SIB displacement immediate +523 53 # push EBX to stack +524 +run: push EBX +525 +run: decrementing ESP to 0x00000060 +526 +run: pushing value 0x0000000a +527 +528 :(before "End Single-Byte Opcodes") +529 case 0x50: +530 case 0x51: +531 case 0x52: +532 case 0x53: +533 case 0x54: +534 case 0x55: +535 case 0x56: +536 case 0x57: { // push r32 to stack +537 uint8_t reg = op & 0x7; +538 trace(90, "run") << "push " << rname(reg) << end(); +539 //? cerr << "push: " << NUM(reg) << ": " << Reg[reg].u << " => " << Reg[ESP].u << '\n'; +540 push(Reg[reg].u); +541 break; +542 } 543 -544 :(scenario pop_r32) -545 % Reg[ESP].u = 0x60; -546 % write_mem_i32(0x60, 0x0000000a); -547 == 0x1 # code segment -548 # op ModR/M SIB displacement immediate -549 5b # pop stack to EBX -550 == 0x60 # data segment -551 0a 00 00 00 # 0x0a -552 +run: pop into EBX -553 +run: popping value 0x0000000a -554 +run: incrementing ESP to 0x00000064 +544 //:: pop +545 +546 :(before "End Initialize Op Names(name)") +547 put(name, "58", "pop top of stack to R0 (EAX)"); +548 put(name, "59", "pop top of stack to R1 (ECX)"); +549 put(name, "5a", "pop top of stack to R2 (EDX)"); +550 put(name, "5b", "pop top of stack to R3 (EBX)"); +551 put(name, "5c", "pop top of stack to R4 (ESP)"); +552 put(name, "5d", "pop top of stack to R5 (EBP)"); +553 put(name, "5e", "pop top of stack to R6 (ESI)"); +554 put(name, "5f", "pop top of stack to R7 (EDI)"); 555 -556 :(before "End Single-Byte Opcodes") -557 case 0x58: -558 case 0x59: -559 case 0x5a: -560 case 0x5b: -561 case 0x5c: -562 case 0x5d: -563 case 0x5e: -564 case 0x5f: { // pop stack into r32 -565 uint8_t reg = op & 0x7; -566 trace(90, "run") << "pop into " << rname(reg) << end(); -567 //? cerr << "pop from " << Reg[ESP].u << '\n'; -568 Reg[reg].u = pop(); -569 //? cerr << "=> " << NUM(reg) << ": " << Reg[reg].u << '\n'; -570 break; -571 } -572 :(code) -573 uint32_t pop() { -574 uint32_t result = read_mem_u32(Reg[ESP].u); -575 trace(90, "run") << "popping value 0x" << HEXWORD << result << end(); -576 Reg[ESP].u += 4; -577 trace(90, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); -578 return result; -579 } +556 :(scenario pop_r32) +557 % Reg[ESP].u = 0x60; +558 % write_mem_i32(0x60, 0x0000000a); +559 == 0x1 # code segment +560 # op ModR/M SIB displacement immediate +561 5b # pop stack to EBX +562 == 0x60 # data segment +563 0a 00 00 00 # 0x0a +564 +run: pop into EBX +565 +run: popping value 0x0000000a +566 +run: incrementing ESP to 0x00000064 +567 +568 :(before "End Single-Byte Opcodes") +569 case 0x58: +570 case 0x59: +571 case 0x5a: +572 case 0x5b: +573 case 0x5c: +574 case 0x5d: +575 case 0x5e: +576 case 0x5f: { // pop stack into r32 +577 uint8_t reg = op & 0x7; +578 trace(90, "run") << "pop into " << rname(reg) << end(); +579 //? cerr << "pop from " << Reg[ESP].u << '\n'; +580 Reg[reg].u = pop(); +581 //? cerr << "=> " << NUM(reg) << ": " << Reg[reg].u << '\n'; +582 break; +583 } +584 :(code) +585 uint32_t pop() { +586 uint32_t result = read_mem_u32(Reg[ESP].u); +587 trace(90, "run") << "popping value 0x" << HEXWORD << result << end(); +588 Reg[ESP].u += 4; +589 trace(90, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); +590 return result; +591 } diff --git a/html/subx/014indirect_addressing.cc.html b/html/subx/014indirect_addressing.cc.html index 1dc22b7e..b3594524 100644 --- a/html/subx/014indirect_addressing.cc.html +++ b/html/subx/014indirect_addressing.cc.html @@ -82,7 +82,7 @@ if ('onhashchange' in window) { 18 case 0: // indirect addressing 19 switch (rm) { 20 default: // address in register - 21 trace(90, "run") << "effective address is 0x" << std::hex << Reg[rm].u << " (" << rname(rm) << ")" << end(); + 21 trace(90, "run") << "effective address is 0x" << std::hex << Reg[rm].u << " (" << rname(rm) << ")" << end(); 22 addr = Reg[rm].u; 23 break; 24 // End Mod 0 Special-cases(addr) @@ -111,7 +111,7 @@ if ('onhashchange' in window) { 47 case 0x03: { // add r/m32 to r32 48 uint8_t modrm = next(); 49 uint8_t arg1 = (modrm>>3)&0x7; - 50 trace(90, "run") << "add r/m32 to " << rname(arg1) << end(); + 50 trace(90, "run") << "add r/m32 to " << rname(arg1) << end(); 51 const int32_t* arg2 = effective_address(modrm); 52 BINARY_ARITHMETIC_OP(+, Reg[arg1].i, *arg2); 53 break; @@ -154,7 +154,7 @@ if ('onhashchange' in window) { 90 case 0x2b: { // subtract r/m32 from r32 91 uint8_t modrm = next(); 92 uint8_t arg1 = (modrm>>3)&0x7; - 93 trace(90, "run") << "subtract r/m32 from " << rname(arg1) << end(); + 93 trace(90, "run") << "subtract r/m32 from " << rname(arg1) << end(); 94 const int32_t* arg2 = effective_address(modrm); 95 BINARY_ARITHMETIC_OP(-, Reg[arg1].i, *arg2); 96 break; @@ -197,7 +197,7 @@ if ('onhashchange' in window) { 133 case 0x23: { // and r/m32 with r32 134 uint8_t modrm = next(); 135 uint8_t arg1 = (modrm>>3)&0x7; -136 trace(90, "run") << "and r/m32 with " << rname(arg1) << end(); +136 trace(90, "run") << "and r/m32 with " << rname(arg1) << end(); 137 const int32_t* arg2 = effective_address(modrm); 138 BINARY_BITWISE_OP(&, Reg[arg1].u, *arg2); 139 break; @@ -240,7 +240,7 @@ if ('onhashchange' in window) { 176 case 0x0b: { // or r/m32 with r32 177 uint8_t modrm = next(); 178 uint8_t arg1 = (modrm>>3)&0x7; -179 trace(90, "run") << "or r/m32 with " << rname(arg1) << end(); +179 trace(90, "run") << "or r/m32 with " << rname(arg1) << end(); 180 const int32_t* arg2 = effective_address(modrm); 181 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); 182 break; @@ -283,7 +283,7 @@ if ('onhashchange' in window) { 219 case 0x33: { // xor r/m32 with r32 220 uint8_t modrm = next(); 221 uint8_t arg1 = (modrm>>3)&0x7; -222 trace(90, "run") << "xor r/m32 with " << rname(arg1) << end(); +222 trace(90, "run") << "xor r/m32 with " << rname(arg1) << end(); 223 const int32_t* arg2 = effective_address(modrm); 224 BINARY_BITWISE_OP(|, Reg[arg1].u, *arg2); 225 break; @@ -367,7 +367,7 @@ if ('onhashchange' in window) { 303 case 0x3b: { // set SF if r32 < r/m32 304 uint8_t modrm = next(); 305 uint8_t reg1 = (modrm>>3)&0x7; -306 trace(90, "run") << "compare r/m32 with " << rname(reg1) << end(); +306 trace(90, "run") << "compare r/m32 with " << rname(reg1) << end(); 307 int32_t arg1 = Reg[reg1].i; 308 int32_t* arg2 = effective_address(modrm); 309 int32_t tmp1 = arg1 - *arg2; @@ -439,7 +439,7 @@ if ('onhashchange' in window) { 375 case 0x8b: { // copy r32 to r/m32 376 uint8_t modrm = next(); 377 uint8_t reg1 = (modrm>>3)&0x7; -378 trace(90, "run") << "copy r/m32 to " << rname(reg1) << end(); +378 trace(90, "run") << "copy r/m32 to " << rname(reg1) << end(); 379 int32_t* arg2 = effective_address(modrm); 380 Reg[reg1].i = *arg2; 381 trace(90, "run") << "storing 0x" << HEXWORD << *arg2 << end(); @@ -469,7 +469,7 @@ if ('onhashchange' in window) { 405 case 0x88: { // copy r/m8 to r8 406 uint8_t modrm = next(); 407 uint8_t reg2 = (modrm>>3)&0x7; -408 trace(90, "run") << "copy lowermost byte of " << rname(reg2) << " to r8/m8-at-r32" << end(); +408 trace(90, "run") << "copy lowermost byte of " << rname(reg2) << " to r8/m8-at-r32" << end(); 409 // use unsigned to zero-extend 8-bit value to 32 bits 410 uint8_t* arg1 = reinterpret_cast<uint8_t*>(effective_address(modrm)); 411 *arg1 = Reg[reg2].u; @@ -501,12 +501,12 @@ if ('onhashchange' in window) { 437 case 0x8a: { // copy r/m8 to r8 438 uint8_t modrm = next(); 439 uint8_t reg1 = (modrm>>3)&0x7; -440 trace(90, "run") << "copy r8/m8-at-r32 to lowermost byte of " << rname(reg1) << end(); +440 trace(90, "run") << "copy r8/m8-at-r32 to lowermost byte of " << rname(reg1) << end(); 441 // use unsigned to zero-extend 8-bit value to 32 bits 442 uint8_t* arg2 = reinterpret_cast<uint8_t*>(effective_address(modrm)); 443 trace(90, "run") << "storing 0x" << HEXBYTE << NUM(*arg2) << end(); 444 *reinterpret_cast<uint8_t*>(&Reg[reg1].u) = *arg2; // assumes host is little-endian -445 trace(90, "run") << rname(reg1) << " now contains 0x" << HEXWORD << Reg[reg1].u << end(); +445 trace(90, "run") << rname(reg1) << " now contains 0x" << HEXWORD << Reg[reg1].u << end(); 446 break; 447 } 448 @@ -576,7 +576,7 @@ if ('onhashchange' in window) { 512 # ModR/M in binary: 00 (indirect mode) 000 (pop r/m32) 000 (dest EAX) 513 == 0x10 # data segment 514 30 00 00 00 # 0x30 -515 +run: pop into r/m32 +515 +run: pop into r/m32 516 +run: effective address is 0x60 (EAX) 517 +run: popping value 0x00000030 518 +run: incrementing ESP to 0x00000014 @@ -589,7 +589,7 @@ if ('onhashchange' in window) { 525 case 0: { 526 trace(90, "run") << "pop into r/m32" << end(); 527 int32_t* dest = effective_address(modrm); -528 *dest = pop(); +528 *dest = pop(); 529 break; 530 } 531 } @@ -612,7 +612,7 @@ if ('onhashchange' in window) { 548 549 :(before "End Mod 0 Special-cases(addr)") 550 case 5: // exception: mod 0b00 rm 0b101 => incoming disp32 -551 addr = next32(); +551 addr = next32(); 552 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (disp32)" << end(); 553 break; 554 @@ -637,7 +637,7 @@ if ('onhashchange' in window) { 573 switch (rm) { 574 default: 575 addr = Reg[rm].u; -576 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); +576 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); 577 break; 578 // End Mod 1 Special-cases(addr) 579 } @@ -682,12 +682,12 @@ if ('onhashchange' in window) { 618 switch (rm) { 619 default: 620 addr = Reg[rm].u; -621 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); +621 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(rm) << ")" << end(); 622 break; 623 // End Mod 2 Special-cases(addr) 624 } 625 if (addr > 0) { -626 addr += next32(); +626 addr += next32(); 627 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding disp32)" << end(); 628 } 629 break; @@ -705,6 +705,29 @@ if ('onhashchange' in window) { 641 +run: effective address is initially 0x61 (EAX) 642 +run: effective address is 0x60 (after adding disp32) 643 +run: storing 0x00000011 +644 +645 //:: lea +646 +647 :(before "End Initialize Op Names(name)") +648 put(name, "8d", "load effective address of memory in rm32 into r32"); +649 +650 :(scenario lea) +651 % Reg[EAX].u = 0x60; +652 == 0x1 +653 # op ModR/M SIB displacement immediate +654 8d 18 +655 # ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX) +656 +run: lea into EBX +657 +run: effective address is 0x60 (EAX) +658 +659 :(before "End Single-Byte Opcodes") +660 case 0x8d: { // lea m32 to r32 +661 uint8_t modrm = next(); +662 uint8_t arg1 = (modrm>>3)&0x7; +663 trace(90, "run") << "lea into " << rname(arg1) << end(); +664 Reg[arg1].u = effective_address_number(modrm); +665 break; +666 } diff --git a/html/subx/015immediate_addressing.cc.html b/html/subx/015immediate_addressing.cc.html index bb282579..c94ec957 100644 --- a/html/subx/015immediate_addressing.cc.html +++ b/html/subx/015immediate_addressing.cc.html @@ -84,7 +84,7 @@ if ('onhashchange' in window) { 20 trace(90, "run") << "combine imm32 with r/m32" << end(); 21 uint8_t modrm = next(); 22 int32_t* arg1 = effective_address(modrm); - 23 int32_t arg2 = next32(); + 23 int32_t arg2 = next32(); 24 trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); 25 uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits 26 switch (subop) { @@ -131,7 +131,7 @@ if ('onhashchange' in window) { 67 68 :(before "End Single-Byte Opcodes") 69 case 0x2d: { // subtract imm32 from EAX - 70 int32_t arg2 = next32(); + 70 int32_t arg2 = next32(); 71 trace(90, "run") << "subtract imm32 0x" << HEXWORD << arg2 << " from EAX" << end(); 72 BINARY_ARITHMETIC_OP(-, Reg[EAX].i, arg2); 73 break; @@ -189,7 +189,7 @@ if ('onhashchange' in window) { 125 126 :(before "End Single-Byte Opcodes") 127 case 0x25: { // and imm32 with EAX -128 int32_t arg2 = next32(); +128 int32_t arg2 = next32(); 129 trace(90, "run") << "and imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); 130 BINARY_BITWISE_OP(&, Reg[EAX].i, arg2); 131 break; @@ -247,7 +247,7 @@ if ('onhashchange' in window) { 183 184 :(before "End Single-Byte Opcodes") 185 case 0x0d: { // or imm32 with EAX -186 int32_t arg2 = next32(); +186 int32_t arg2 = next32(); 187 trace(90, "run") << "or imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); 188 BINARY_BITWISE_OP(|, Reg[EAX].i, arg2); 189 break; @@ -303,7 +303,7 @@ if ('onhashchange' in window) { 239 240 :(before "End Single-Byte Opcodes") 241 case 0x35: { // xor imm32 with EAX -242 int32_t arg2 = next32(); +242 int32_t arg2 = next32(); 243 trace(90, "run") << "xor imm32 0x" << HEXWORD << arg2 << " with EAX" << end(); 244 BINARY_BITWISE_OP(^, Reg[EAX].i, arg2); 245 break; @@ -360,7 +360,7 @@ if ('onhashchange' in window) { 296 :(before "End Single-Byte Opcodes") 297 case 0x3d: { // compare EAX with imm32 298 int32_t arg1 = Reg[EAX].i; -299 int32_t arg2 = next32(); +299 int32_t arg2 = next32(); 300 trace(90, "run") << "compare EAX and imm32 0x" << HEXWORD << arg2 << end(); 301 int32_t tmp1 = arg1 - arg2; 302 SF = (tmp1 < 0); @@ -502,8 +502,8 @@ if ('onhashchange' in window) { 438 case 0xbe: 439 case 0xbf: { // copy imm32 to r32 440 uint8_t reg1 = op & 0x7; -441 int32_t arg2 = next32(); -442 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to " << rname(reg1) << end(); +441 int32_t arg2 = next32(); +442 trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to " << rname(reg1) << end(); 443 Reg[reg1].i = arg2; 444 break; 445 } @@ -528,7 +528,7 @@ if ('onhashchange' in window) { 464 uint8_t modrm = next(); 465 trace(90, "run") << "copy imm32 to r/m32" << end(); 466 int32_t* arg1 = effective_address(modrm); -467 int32_t arg2 = next32(); +467 int32_t arg2 = next32(); 468 trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); 469 *arg1 = arg2; 470 break; @@ -550,7 +550,7 @@ if ('onhashchange' in window) { 486 487 :(before "End Single-Byte Opcodes") 488 case 0x68: { -489 uint32_t val = static_cast<uint32_t>(next32()); +489 uint32_t val = static_cast<uint32_t>(next32()); 490 trace(90, "run") << "push imm32 0x" << HEXWORD << val << end(); 491 //? cerr << "push: " << val << " => " << Reg[ESP].u << '\n'; 492 push(val); diff --git a/html/subx/016index_addressing.cc.html b/html/subx/016index_addressing.cc.html index 0a52a62d..565b5b96 100644 --- a/html/subx/016index_addressing.cc.html +++ b/html/subx/016index_addressing.cc.html @@ -88,11 +88,11 @@ if ('onhashchange' in window) { 26 uint32_t addr = 0; 27 if (base != EBP || mod != 0) { 28 addr = Reg[base].u; - 29 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(base) << ")" << end(); + 29 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (" << rname(base) << ")" << end(); 30 } 31 else { 32 // base == EBP && mod == 0 - 33 addr = next32(); // ignore base + 33 addr = next32(); // ignore base 34 trace(90, "run") << "effective address is initially 0x" << std::hex << addr << " (disp32)" << end(); 35 } 36 uint8_t index = (sib>>3)&0x7; @@ -103,7 +103,7 @@ if ('onhashchange' in window) { 41 else { 42 uint8_t scale = (1 << (sib>>6)); 43 addr += Reg[index].i*scale; // treat index register as signed. Maybe base as well? But we'll always ensure it's non-negative. - 44 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding " << rname(index) << "*" << NUM(scale) << ")" << end(); + 44 trace(90, "run") << "effective address is 0x" << std::hex << addr << " (after adding " << rname(index) << "*" << NUM(scale) << ")" << end(); 45 } 46 return addr; 47 } diff --git a/html/subx/019functions.cc.html b/html/subx/019functions.cc.html index a0845071..5aa8a9e7 100644 --- a/html/subx/019functions.cc.html +++ b/html/subx/019functions.cc.html @@ -79,7 +79,7 @@ if ('onhashchange' in window) { 16 17 :(before "End Single-Byte Opcodes") 18 case 0xe8: { // call disp32 relative to next EIP -19 int32_t offset = next32(); +19 int32_t offset = next32(); 20 trace(90, "run") << "call imm32 0x" << HEXWORD << offset << end(); 21 //? cerr << "push: EIP: " << EIP << " => " << Reg[ESP].u << '\n'; 22 push(EIP); @@ -147,7 +147,7 @@ if ('onhashchange' in window) { 84 :(before "End Single-Byte Opcodes") 85 case 0xc3: { // return from a call 86 trace(90, "run") << "return" << end(); -87 EIP = pop(); +87 EIP = pop(); 88 trace(90, "run") << "jumping to 0x" << HEXWORD << EIP << end(); 89 break; 90 } diff --git a/html/subx/028translate.cc.html b/html/subx/028translate.cc.html index 1f60589f..857c1b77 100644 --- a/html/subx/028translate.cc.html +++ b/html/subx/028translate.cc.html @@ -89,7 +89,7 @@ if ('onhashchange' in window) { 29 } 30 parse(fin, p); 31 if (trace_contains_errors()) return 1; - 32 transform(p); + 32 transform(p); 33 if (trace_contains_errors()) return 1; 34 save_elf(p, argv[3]); 35 if (trace_contains_errors()) unlink(argv[3]); @@ -204,7 +204,7 @@ if ('onhashchange' in window) { 144 for (int i = 0; i < SIZE(s.lines); ++i) { 145 const vector<word>& w = s.lines.at(i).words; 146 for (int j = 0; j < SIZE(w); ++j) { -147 uint8_t x = hex_byte(w.at(j).data); // we're done with metadata by this point +147 uint8_t x = hex_byte(w.at(j).data); // we're done with metadata by this point 148 out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1); 149 } 150 } diff --git a/html/subx/030---operands.cc.html b/html/subx/030---operands.cc.html index e97d633f..0cd8748e 100644 --- a/html/subx/030---operands.cc.html +++ b/html/subx/030---operands.cc.html @@ -80,7 +80,7 @@ if ('onhashchange' in window) { 19 "Each operand has a type. An instruction won't have more than one operand of\n" 20 "any type.\n" 21 "Each instruction has some set of allowed operand types. It'll reject others.\n" - 22 "The complete list of operand types: mod, subop, r32 (register), rm32\n" + 22 "The complete list of operand types: mod, subop, r32 (register), rm32\n" 23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n" 24 "imm32.\n" 25 "Each of these has its own help page. Try reading 'subx help mod' next.\n" @@ -198,7 +198,7 @@ if ('onhashchange' in window) { 137 138 :(after "Begin Transforms") 139 // Begin Level-2 Transforms -140 Transform.push_back(pack_operands); +140 Transform.push_back(pack_operands); 141 // End Level-2 Transforms 142 143 :(code) @@ -242,19 +242,19 @@ if ('onhashchange' in window) { 181 for (int i = 0; i < SIZE(in.words); ++i) { 182 const word& curr = in.words.at(i); 183 if (has_operand_metadata(curr, "mod")) { -184 mod = hex_byte(curr.data); +184 mod = hex_byte(curr.data); 185 emit = true; 186 } 187 else if (has_operand_metadata(curr, "rm32")) { -188 rm32 = hex_byte(curr.data); +188 rm32 = hex_byte(curr.data); 189 emit = true; 190 } 191 else if (has_operand_metadata(curr, "r32")) { -192 reg_subop = hex_byte(curr.data); +192 reg_subop = hex_byte(curr.data); 193 emit = true; 194 } 195 else if (has_operand_metadata(curr, "subop")) { -196 reg_subop = hex_byte(curr.data); +196 reg_subop = hex_byte(curr.data); 197 emit = true; 198 } 199 } @@ -268,15 +268,15 @@ if ('onhashchange' in window) { 207 for (int i = 0; i < SIZE(in.words); ++i) { 208 const word& curr = in.words.at(i); 209 if (has_operand_metadata(curr, "scale")) { -210 scale = hex_byte(curr.data); +210 scale = hex_byte(curr.data); 211 emit = true; 212 } 213 else if (has_operand_metadata(curr, "index")) { -214 index = hex_byte(curr.data); +214 index = hex_byte(curr.data); 215 emit = true; 216 } 217 else if (has_operand_metadata(curr, "base")) { -218 base = hex_byte(curr.data); +218 base = hex_byte(curr.data); 219 emit = true; 220 } 221 } @@ -311,10 +311,10 @@ if ('onhashchange' in window) { 250 if (num == 1 || !is_hex_int(w.data)) { 251 out.words.push_back(w); 252 if (is_hex_int(w.data)) -253 out.words.back().data = hex_byte_to_string(parse_int(w.data)); +253 out.words.back().data = hex_byte_to_string(parse_int(w.data)); 254 return; 255 } -256 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); +256 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); 257 } 258 259 void emit_hex_bytes(line& out, uint32_t val, int num) { @@ -376,12 +376,12 @@ if ('onhashchange' in window) { 315 316 //: helper for scenario 317 :(code) -318 void transform(const string& text_bytes) { +318 void transform(const string& text_bytes) { 319 program p; 320 istringstream in(text_bytes); 321 parse(in, p); 322 if (trace_contains_errors()) return; -323 transform(p); +323 transform(p); 324 } 325 326 :(scenario pack_modrm_imm32) diff --git a/html/subx/031check_operands.cc.html b/html/subx/031check_operands.cc.html index ce640e7e..69a00ea0 100644 --- a/html/subx/031check_operands.cc.html +++ b/html/subx/031check_operands.cc.html @@ -90,18 +90,18 @@ if ('onhashchange' in window) { 26 void check_operands(const line& inst) { 27 word op = preprocess_op(inst.words.at(0)); 28 if (op.data == "0f") { - 29 check_operands_0f(inst); + 29 check_operands_0f(inst); 30 return; 31 } 32 if (op.data == "f3") { - 33 check_operands_f3(inst); + 33 check_operands_f3(inst); 34 return; 35 } 36 check_operands(inst, op); 37 } 38 39 word preprocess_op(word/*copy*/ op) { - 40 op.data = tolower(op.data.c_str()); + 40 op.data = tolower(op.data.c_str()); 41 // opcodes can't be negative 42 if (starts_with(op.data, "0x")) 43 op.data = op.data.substr(2); @@ -278,329 +278,331 @@ if ('onhashchange' in window) { 214 put(Permitted_operands, "8b", 0x01); 215 // swap 216 put(Permitted_operands, "87", 0x01); -217 // pop -218 put(Permitted_operands, "8f", 0x01); -219 -220 //// Class O: op, ModR/M and subop (not r32) -221 // imm32 imm8 disp32 |disp16 disp8 subop modrm -222 // 0 0 0 |0 0 1 1 -223 put(Permitted_operands, "f7", 0x03); // test/not/mul/div -224 put(Permitted_operands, "ff", 0x03); // jump/push/call -225 -226 //// Class N: op, ModR/M and imm32 -227 // imm32 imm8 disp32 |disp16 disp8 subop modrm -228 // 1 0 0 |0 0 0 1 -229 put(Permitted_operands, "c7", 0x41); // copy -230 -231 //// Class P: op, ModR/M, subop (not r32) and imm32 -232 // imm32 imm8 disp32 |disp16 disp8 subop modrm -233 // 1 0 0 |0 0 1 1 -234 put(Permitted_operands, "81", 0x43); // combine -235 -236 // End Init Permitted Operands -237 } -238 -239 :(code) -240 #define HAS(bitvector, bit) ((bitvector) & (1 << (bit))) -241 #define SET(bitvector, bit) ((bitvector) | (1 << (bit))) -242 #define CLEAR(bitvector, bit) ((bitvector) & (~(1 << (bit)))) -243 -244 void check_operands(const line& inst, const word& op) { -245 if (!is_hex_byte(op)) return; -246 uint8_t expected_bitvector = get(Permitted_operands, op.data); -247 if (HAS(expected_bitvector, MODRM)) { -248 check_operands_modrm(inst, op); -249 compare_bitvector_modrm(inst, expected_bitvector, op); -250 } -251 else { -252 compare_bitvector(inst, expected_bitvector, op); -253 } -254 } -255 -256 //: Many instructions can be checked just by comparing bitvectors. +217 // lea +218 put(Permitted_operands, "8d", 0x01); +219 // pop +220 put(Permitted_operands, "8f", 0x01); +221 +222 //// Class O: op, ModR/M and subop (not r32) +223 // imm32 imm8 disp32 |disp16 disp8 subop modrm +224 // 0 0 0 |0 0 1 1 +225 put(Permitted_operands, "f7", 0x03); // test/not/mul/div +226 put(Permitted_operands, "ff", 0x03); // jump/push/call +227 +228 //// Class N: op, ModR/M and imm32 +229 // imm32 imm8 disp32 |disp16 disp8 subop modrm +230 // 1 0 0 |0 0 0 1 +231 put(Permitted_operands, "c7", 0x41); // copy +232 +233 //// Class P: op, ModR/M, subop (not r32) and imm32 +234 // imm32 imm8 disp32 |disp16 disp8 subop modrm +235 // 1 0 0 |0 0 1 1 +236 put(Permitted_operands, "81", 0x43); // combine +237 +238 // End Init Permitted Operands +239 } +240 +241 :(code) +242 #define HAS(bitvector, bit) ((bitvector) & (1 << (bit))) +243 #define SET(bitvector, bit) ((bitvector) | (1 << (bit))) +244 #define CLEAR(bitvector, bit) ((bitvector) & (~(1 << (bit)))) +245 +246 void check_operands(const line& inst, const word& op) { +247 if (!is_hex_byte(op)) return; +248 uint8_t expected_bitvector = get(Permitted_operands, op.data); +249 if (HAS(expected_bitvector, MODRM)) { +250 check_operands_modrm(inst, op); +251 compare_bitvector_modrm(inst, expected_bitvector, op); +252 } +253 else { +254 compare_bitvector(inst, expected_bitvector, op); +255 } +256 } 257 -258 void compare_bitvector(const line& inst, uint8_t expected, const word& op) { -259 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere -260 uint8_t bitvector = compute_operand_bitvector(inst); -261 if (trace_contains_errors()) return; // duplicate operand type -262 if (bitvector == expected) return; // all good with this instruction -263 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { -264 //? cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; -265 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand -266 const string& optype = Operand_type_name.at(i); -267 if ((bitvector & 0x1) > (expected & 0x1)) -268 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); -269 else -270 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); -271 // continue giving all errors for a single instruction -272 } -273 // ignore settings in any unused bits -274 } -275 -276 string maybe_name(const word& op) { -277 if (!is_hex_byte(op)) return ""; -278 if (!contains_key(name, op.data)) return ""; -279 return " ("+get(name, op.data)+')'; -280 } -281 -282 uint32_t compute_operand_bitvector(const line& inst) { -283 uint32_t bitvector = 0; -284 for (int i = /*skip op*/1; i < SIZE(inst.words); ++i) { -285 bitvector = bitvector | bitvector_for_operand(inst.words.at(i)); -286 if (trace_contains_errors()) return INVALID_OPERANDS; // duplicate operand type -287 } -288 return bitvector; -289 } -290 -291 bool has_operands(const line& inst) { -292 return SIZE(inst.words) > first_operand(inst); -293 } -294 -295 int first_operand(const line& inst) { -296 if (inst.words.at(0).data == "0f") return 2; -297 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") { -298 if (inst.words.at(1).data == "0f") -299 return 3; -300 else -301 return 2; -302 } -303 return 1; -304 } -305 -306 // Scan the metadata of 'w' and return the bit corresponding to any operand type. -307 // Also raise an error if metadata contains multiple operand types. -308 uint32_t bitvector_for_operand(const word& w) { -309 uint32_t bv = 0; -310 bool found = false; -311 for (int i = 0; i < SIZE(w.metadata); ++i) { -312 const string& curr = w.metadata.at(i); -313 if (!contains_key(Operand_type, curr)) continue; // ignore unrecognized metadata -314 if (found) { -315 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); -316 return INVALID_OPERANDS; -317 } -318 bv = (1 << get(Operand_type, curr)); -319 found = true; -320 } -321 return bv; -322 } -323 -324 :(scenario conflicting_operand_type) -325 % Hide_errors = true; -326 == 0x1 -327 cd/software-interrupt 80/imm8/imm32 -328 +error: '80/imm8/imm32' has conflicting operand types; it should have only one -329 -330 //: Instructions computing effective addresses have more complex rules, so -331 //: we'll hard-code a common set of instruction-decoding rules. -332 -333 :(scenario check_missing_mod_operand) -334 % Hide_errors = true; -335 == 0x1 -336 81 0/add/subop 3/rm32/ebx 1/imm32 -337 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand -338 -339 :(code) -340 void check_operands_modrm(const line& inst, const word& op) { -341 if (all_hex_bytes(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere -342 check_operand_metadata_present(inst, "mod", op); -343 check_operand_metadata_present(inst, "rm32", op); -344 // no check for r32; some instructions don't use it; just assume it's 0 if missing -345 if (op.data == "81" || op.data == "8f" || op.data == "ff") { // keep sync'd with 'help subop' -346 check_operand_metadata_present(inst, "subop", op); -347 check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop"); -348 } -349 if (trace_contains_errors()) return; -350 if (metadata(inst, "rm32").data != "4") return; -351 // SIB byte checks -352 uint8_t mod = hex_byte(metadata(inst, "mod").data); -353 if (mod != /*direct*/3) { -354 check_operand_metadata_present(inst, "base", op); -355 check_operand_metadata_present(inst, "index", op); // otherwise why go to SIB? -356 } -357 else { -358 check_operand_metadata_absent(inst, "base", op, "direct mode"); -359 check_operand_metadata_absent(inst, "index", op, "direct mode"); -360 } -361 // no check for scale; 0 (2**0 = 1) by default -362 } -363 -364 // same as compare_bitvector, with a couple of exceptions for modrm-based instructions -365 // exception 1: ignore modrm bit since we already checked it above -366 // exception 2: modrm instructions can use a displacement on occasion -367 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) { -368 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere -369 uint8_t bitvector = compute_operand_bitvector(inst); -370 if (trace_contains_errors()) return; // duplicate operand type -371 expected = CLEAR(expected, MODRM); // exception 1 -372 if (bitvector == expected) return; // all good with this instruction -373 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { -374 //? cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; -375 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand -376 if (i == DISP8 || i == DISP32) continue; // exception 2 -377 const string& optype = Operand_type_name.at(i); -378 if ((bitvector & 0x1) > (expected & 0x1)) -379 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); -380 else -381 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); -382 // continue giving all errors for a single instruction -383 } -384 // ignore settings in any unused bits -385 } -386 -387 void check_operand_metadata_present(const line& inst, const string& type, const word& op) { -388 if (!has_operand_metadata(inst, type)) -389 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end(); -390 } -391 -392 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) { -393 if (has_operand_metadata(inst, type)) -394 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end(); -395 } -396 -397 :(scenarios transform) -398 :(scenario modrm_with_displacement) -399 % Reg[EAX].u = 0x1; -400 == 0x1 -401 # just avoid null pointer -402 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8 # copy *(EAX+4) to EDX -403 $error: 0 -404 :(scenarios run) -405 -406 :(scenario conflicting_operands_in_modrm_instruction) -407 % Hide_errors = true; -408 == 0x1 -409 01/add 0/mod 3/mod -410 +error: '01/add 0/mod 3/mod' has conflicting mod operands -411 -412 :(scenario conflicting_operand_type_modrm) -413 % Hide_errors = true; -414 == 0x1 -415 01/add 0/mod 3/rm32/r32 -416 +error: '3/rm32/r32' has conflicting operand types; it should have only one -417 -418 :(scenario check_missing_rm32_operand) -419 % Hide_errors = true; -420 == 0x1 -421 81 0/add/subop 0/mod 1/imm32 -422 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand -423 -424 :(scenario check_missing_subop_operand) -425 % Hide_errors = true; -426 == 0x1 -427 81 0/mod 3/rm32/ebx 1/imm32 -428 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand -429 -430 :(scenario check_missing_base_operand) -431 % Hide_errors = true; -432 == 0x1 -433 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32 -434 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand -435 -436 :(scenario check_missing_index_operand) -437 % Hide_errors = true; -438 == 0x1 -439 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32 -440 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand -441 -442 :(scenario check_missing_base_operand_2) -443 % Hide_errors = true; -444 == 0x1 -445 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32 -446 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand -447 -448 :(scenario check_base_operand_not_needed_in_direct_mode) -449 == 0x1 -450 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32 -451 $error: 0 -452 -453 //:: similarly handle multi-byte opcodes +258 //: Many instructions can be checked just by comparing bitvectors. +259 +260 void compare_bitvector(const line& inst, uint8_t expected, const word& op) { +261 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +262 uint8_t bitvector = compute_operand_bitvector(inst); +263 if (trace_contains_errors()) return; // duplicate operand type +264 if (bitvector == expected) return; // all good with this instruction +265 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { +266 //? cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; +267 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand +268 const string& optype = Operand_type_name.at(i); +269 if ((bitvector & 0x1) > (expected & 0x1)) +270 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); +271 else +272 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); +273 // continue giving all errors for a single instruction +274 } +275 // ignore settings in any unused bits +276 } +277 +278 string maybe_name(const word& op) { +279 if (!is_hex_byte(op)) return ""; +280 if (!contains_key(name, op.data)) return ""; +281 return " ("+get(name, op.data)+')'; +282 } +283 +284 uint32_t compute_operand_bitvector(const line& inst) { +285 uint32_t bitvector = 0; +286 for (int i = /*skip op*/1; i < SIZE(inst.words); ++i) { +287 bitvector = bitvector | bitvector_for_operand(inst.words.at(i)); +288 if (trace_contains_errors()) return INVALID_OPERANDS; // duplicate operand type +289 } +290 return bitvector; +291 } +292 +293 bool has_operands(const line& inst) { +294 return SIZE(inst.words) > first_operand(inst); +295 } +296 +297 int first_operand(const line& inst) { +298 if (inst.words.at(0).data == "0f") return 2; +299 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") { +300 if (inst.words.at(1).data == "0f") +301 return 3; +302 else +303 return 2; +304 } +305 return 1; +306 } +307 +308 // Scan the metadata of 'w' and return the bit corresponding to any operand type. +309 // Also raise an error if metadata contains multiple operand types. +310 uint32_t bitvector_for_operand(const word& w) { +311 uint32_t bv = 0; +312 bool found = false; +313 for (int i = 0; i < SIZE(w.metadata); ++i) { +314 const string& curr = w.metadata.at(i); +315 if (!contains_key(Operand_type, curr)) continue; // ignore unrecognized metadata +316 if (found) { +317 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); +318 return INVALID_OPERANDS; +319 } +320 bv = (1 << get(Operand_type, curr)); +321 found = true; +322 } +323 return bv; +324 } +325 +326 :(scenario conflicting_operand_type) +327 % Hide_errors = true; +328 == 0x1 +329 cd/software-interrupt 80/imm8/imm32 +330 +error: '80/imm8/imm32' has conflicting operand types; it should have only one +331 +332 //: Instructions computing effective addresses have more complex rules, so +333 //: we'll hard-code a common set of instruction-decoding rules. +334 +335 :(scenario check_missing_mod_operand) +336 % Hide_errors = true; +337 == 0x1 +338 81 0/add/subop 3/rm32/ebx 1/imm32 +339 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand +340 +341 :(code) +342 void check_operands_modrm(const line& inst, const word& op) { +343 if (all_hex_bytes(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +344 check_operand_metadata_present(inst, "mod", op); +345 check_operand_metadata_present(inst, "rm32", op); +346 // no check for r32; some instructions don't use it; just assume it's 0 if missing +347 if (op.data == "81" || op.data == "8f" || op.data == "ff") { // keep sync'd with 'help subop' +348 check_operand_metadata_present(inst, "subop", op); +349 check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop"); +350 } +351 if (trace_contains_errors()) return; +352 if (metadata(inst, "rm32").data != "4") return; +353 // SIB byte checks +354 uint8_t mod = hex_byte(metadata(inst, "mod").data); +355 if (mod != /*direct*/3) { +356 check_operand_metadata_present(inst, "base", op); +357 check_operand_metadata_present(inst, "index", op); // otherwise why go to SIB? +358 } +359 else { +360 check_operand_metadata_absent(inst, "base", op, "direct mode"); +361 check_operand_metadata_absent(inst, "index", op, "direct mode"); +362 } +363 // no check for scale; 0 (2**0 = 1) by default +364 } +365 +366 // same as compare_bitvector, with a couple of exceptions for modrm-based instructions +367 // exception 1: ignore modrm bit since we already checked it above +368 // exception 2: modrm instructions can use a displacement on occasion +369 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) { +370 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +371 uint8_t bitvector = compute_operand_bitvector(inst); +372 if (trace_contains_errors()) return; // duplicate operand type +373 expected = CLEAR(expected, MODRM); // exception 1 +374 if (bitvector == expected) return; // all good with this instruction +375 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { +376 //? cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; +377 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand +378 if (i == DISP8 || i == DISP32) continue; // exception 2 +379 const string& optype = Operand_type_name.at(i); +380 if ((bitvector & 0x1) > (expected & 0x1)) +381 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); +382 else +383 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); +384 // continue giving all errors for a single instruction +385 } +386 // ignore settings in any unused bits +387 } +388 +389 void check_operand_metadata_present(const line& inst, const string& type, const word& op) { +390 if (!has_operand_metadata(inst, type)) +391 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end(); +392 } +393 +394 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) { +395 if (has_operand_metadata(inst, type)) +396 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end(); +397 } +398 +399 :(scenarios transform) +400 :(scenario modrm_with_displacement) +401 % Reg[EAX].u = 0x1; +402 == 0x1 +403 # just avoid null pointer +404 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8 # copy *(EAX+4) to EDX +405 $error: 0 +406 :(scenarios run) +407 +408 :(scenario conflicting_operands_in_modrm_instruction) +409 % Hide_errors = true; +410 == 0x1 +411 01/add 0/mod 3/mod +412 +error: '01/add 0/mod 3/mod' has conflicting mod operands +413 +414 :(scenario conflicting_operand_type_modrm) +415 % Hide_errors = true; +416 == 0x1 +417 01/add 0/mod 3/rm32/r32 +418 +error: '3/rm32/r32' has conflicting operand types; it should have only one +419 +420 :(scenario check_missing_rm32_operand) +421 % Hide_errors = true; +422 == 0x1 +423 81 0/add/subop 0/mod 1/imm32 +424 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand +425 +426 :(scenario check_missing_subop_operand) +427 % Hide_errors = true; +428 == 0x1 +429 81 0/mod 3/rm32/ebx 1/imm32 +430 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand +431 +432 :(scenario check_missing_base_operand) +433 % Hide_errors = true; +434 == 0x1 +435 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32 +436 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand +437 +438 :(scenario check_missing_index_operand) +439 % Hide_errors = true; +440 == 0x1 +441 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32 +442 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand +443 +444 :(scenario check_missing_base_operand_2) +445 % Hide_errors = true; +446 == 0x1 +447 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32 +448 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand +449 +450 :(scenario check_base_operand_not_needed_in_direct_mode) +451 == 0x1 +452 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32 +453 $error: 0 454 -455 :(code) -456 void check_operands_0f(const line& inst) { -457 assert(inst.words.at(0).data == "0f"); -458 if (SIZE(inst.words) == 1) { -459 raise << "opcode '0f' requires a second opcode\n" << end(); -460 return; -461 } -462 word op = preprocess_op(inst.words.at(1)); -463 if (!contains_key(name_0f, op.data)) { -464 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end(); -465 return; -466 } -467 check_operands_0f(inst, op); -468 } -469 -470 void check_operands_f3(const line& /*unused*/) { -471 raise << "no supported opcodes starting with f3\n" << end(); -472 } -473 -474 :(scenario check_missing_disp16_operand) -475 % Hide_errors = true; -476 == 0x1 -477 # instruction effective address operand displacement immediate -478 # op subop mod rm32 base index scale r32 -479 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -480 0f 84 # jmp if ZF to ?? -481 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand -482 -483 :(before "End Globals") -484 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f; -485 :(before "End Init Permitted Operands") -486 //// Class C: just op and disp16 -487 // imm32 imm8 disp32 |disp16 disp8 subop modrm -488 // 0 0 0 |1 0 0 0 -489 put(Permitted_operands_0f, "84", 0x08); -490 put(Permitted_operands_0f, "85", 0x08); -491 put(Permitted_operands_0f, "8c", 0x08); -492 put(Permitted_operands_0f, "8d", 0x08); -493 put(Permitted_operands_0f, "8e", 0x08); -494 put(Permitted_operands_0f, "8f", 0x08); -495 -496 //// Class M: using ModR/M byte -497 // imm32 imm8 disp32 |disp16 disp8 subop modrm -498 // 0 0 0 |0 0 0 1 -499 put(Permitted_operands_0f, "af", 0x01); -500 -501 :(code) -502 void check_operands_0f(const line& inst, const word& op) { -503 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data); -504 if (HAS(expected_bitvector, MODRM)) -505 check_operands_modrm(inst, op); -506 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op); -507 } -508 -509 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) { -510 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere -511 uint8_t bitvector = compute_operand_bitvector(inst); -512 if (trace_contains_errors()) return; // duplicate operand type -513 if (bitvector == expected) return; // all good with this instruction -514 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { -515 //? cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; -516 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand -517 const string& optype = Operand_type_name.at(i); -518 if ((bitvector & 0x1) > (expected & 0x1)) -519 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end(); -520 else -521 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end(); -522 // continue giving all errors for a single instruction -523 } -524 // ignore settings in any unused bits -525 } -526 -527 string tolower(const char* s) { -528 ostringstream out; -529 for (/*nada*/; *s; ++s) -530 out << static_cast<char>(tolower(*s)); -531 return out.str(); -532 } -533 -534 #undef HAS -535 #undef SET -536 #undef CLEAR -537 -538 :(before "End Includes") -539 #include<cctype> +455 //:: similarly handle multi-byte opcodes +456 +457 :(code) +458 void check_operands_0f(const line& inst) { +459 assert(inst.words.at(0).data == "0f"); +460 if (SIZE(inst.words) == 1) { +461 raise << "opcode '0f' requires a second opcode\n" << end(); +462 return; +463 } +464 word op = preprocess_op(inst.words.at(1)); +465 if (!contains_key(name_0f, op.data)) { +466 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end(); +467 return; +468 } +469 check_operands_0f(inst, op); +470 } +471 +472 void check_operands_f3(const line& /*unused*/) { +473 raise << "no supported opcodes starting with f3\n" << end(); +474 } +475 +476 :(scenario check_missing_disp16_operand) +477 % Hide_errors = true; +478 == 0x1 +479 # instruction effective address operand displacement immediate +480 # op subop mod rm32 base index scale r32 +481 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +482 0f 84 # jmp if ZF to ?? +483 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand +484 +485 :(before "End Globals") +486 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f; +487 :(before "End Init Permitted Operands") +488 //// Class C: just op and disp16 +489 // imm32 imm8 disp32 |disp16 disp8 subop modrm +490 // 0 0 0 |1 0 0 0 +491 put(Permitted_operands_0f, "84", 0x08); +492 put(Permitted_operands_0f, "85", 0x08); +493 put(Permitted_operands_0f, "8c", 0x08); +494 put(Permitted_operands_0f, "8d", 0x08); +495 put(Permitted_operands_0f, "8e", 0x08); +496 put(Permitted_operands_0f, "8f", 0x08); +497 +498 //// Class M: using ModR/M byte +499 // imm32 imm8 disp32 |disp16 disp8 subop modrm +500 // 0 0 0 |0 0 0 1 +501 put(Permitted_operands_0f, "af", 0x01); +502 +503 :(code) +504 void check_operands_0f(const line& inst, const word& op) { +505 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data); +506 if (HAS(expected_bitvector, MODRM)) +507 check_operands_modrm(inst, op); +508 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op); +509 } +510 +511 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) { +512 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere +513 uint8_t bitvector = compute_operand_bitvector(inst); +514 if (trace_contains_errors()) return; // duplicate operand type +515 if (bitvector == expected) return; // all good with this instruction +516 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { +517 //? cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; +518 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand +519 const string& optype = Operand_type_name.at(i); +520 if ((bitvector & 0x1) > (expected & 0x1)) +521 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end(); +522 else +523 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end(); +524 // continue giving all errors for a single instruction +525 } +526 // ignore settings in any unused bits +527 } +528 +529 string tolower(const char* s) { +530 ostringstream out; +531 for (/*nada*/; *s; ++s) +532 out << static_cast<char>(tolower(*s)); +533 return out.str(); +534 } +535 +536 #undef HAS +537 #undef SET +538 #undef CLEAR +539 +540 :(before "End Includes") +541 #include<cctype> diff --git a/html/subx/032check_operand_bounds.cc.html b/html/subx/032check_operand_bounds.cc.html index 12324a67..66ea64bb 100644 --- a/html/subx/032check_operand_bounds.cc.html +++ b/html/subx/032check_operand_bounds.cc.html @@ -93,7 +93,7 @@ if ('onhashchange' in window) { 30 trace(99, "transform") << "-- check operand bounds" << end(); 31 for (int i = 0; i < SIZE(code.lines); ++i) { 32 const line& inst = code.lines.at(i); -33 for (int j = first_operand(inst); j < SIZE(inst.words); ++j) +33 for (int j = first_operand(inst); j < SIZE(inst.words); ++j) 34 check_operand_bounds(inst.words.at(j)); 35 if (trace_contains_errors()) return; // stop at the first mal-formed instruction 36 } @@ -103,7 +103,7 @@ if ('onhashchange' in window) { 40 for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) { 41 if (!has_operand_metadata(w, p->first)) continue; 42 if (!is_hex_int(w.data)) continue; // later transforms are on their own to do their own bounds checking -43 int32_t x = parse_int(w.data); +43 int32_t x = parse_int(w.data); 44 if (x >= 0) { 45 if (static_cast<uint32_t>(x) >= p->second) 46 raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); diff --git a/html/subx/034compute_segment_address.cc.html b/html/subx/034compute_segment_address.cc.html index 886f0f34..2e73693e 100644 --- a/html/subx/034compute_segment_address.cc.html +++ b/html/subx/034compute_segment_address.cc.html @@ -78,7 +78,7 @@ if ('onhashchange' in window) { 17 +run: storing 0x0d0c0b0a 18 19 :(before "End Level-2 Transforms") -20 Transform.push_back(compute_segment_starts); +20 Transform.push_back(compute_segment_starts); 21 22 :(code) 23 void compute_segment_starts(program& p) { diff --git a/html/subx/035labels.cc.html b/html/subx/035labels.cc.html index 182f1d27..de507cf6 100644 --- a/html/subx/035labels.cc.html +++ b/html/subx/035labels.cc.html @@ -105,10 +105,10 @@ if ('onhashchange' in window) { 43 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes 44 loop: 45 05 0x0d0c0b0a/imm32 # add to EAX - 46 +transform: label 'loop' is at address 1 + 46 +transform: label 'loop' is at address 1 47 48 :(before "End Level-2 Transforms") - 49 Transform.push_back(rewrite_labels); + 49 Transform.push_back(rewrite_labels); 50 :(code) 51 void rewrite_labels(program& p) { 52 trace(99, "transform") << "-- rewrite labels" << end(); @@ -117,9 +117,9 @@ if ('onhashchange' in window) { 55 map<string, int32_t> byte_index; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits 56 compute_byte_indices_for_labels(code, byte_index); 57 if (trace_contains_errors()) return; - 58 drop_labels(code); + 58 drop_labels(code); 59 if (trace_contains_errors()) return; - 60 replace_labels_with_displacements(code, byte_index); + 60 replace_labels_with_displacements(code, byte_index); 61 } 62 63 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) { @@ -143,158 +143,168 @@ if ('onhashchange' in window) { 81 ++current_byte; 82 } 83 else { - 84 string label = drop_last(curr.data); + 84 string label = drop_last(curr.data); 85 // ensure labels look sufficiently different from raw hex - 86 check_valid_name(label); + 86 check_valid_name(label); 87 if (trace_contains_errors()) return; 88 if (contains_any_operand_metadata(curr)) - 89 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); + 89 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); 90 if (j > 0) 91 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end(); - 92 put(byte_index, label, current_byte); - 93 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end(); - 94 // no modifying current_byte; label definitions won't be in the final binary - 95 } - 96 } - 97 } - 98 } - 99 -100 void drop_labels(segment& code) { -101 for (int i = 0; i < SIZE(code.lines); ++i) { -102 line& inst = code.lines.at(i); -103 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label); -104 inst.words.erase(new_end, inst.words.end()); -105 } -106 } -107 -108 bool is_label(const word& w) { -109 return *w.data.rbegin() == ':'; -110 } -111 -112 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) { -113 int32_t byte_index_next_instruction_starts_at = 0; -114 for (int i = 0; i < SIZE(code.lines); ++i) { -115 line& inst = code.lines.at(i); -116 byte_index_next_instruction_starts_at += num_bytes(inst); -117 line new_inst; -118 for (int j = 0; j < SIZE(inst.words); ++j) { -119 const word& curr = inst.words.at(j); -120 if (contains_key(byte_index, curr.data)) { -121 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; -122 if (has_operand_metadata(curr, "disp8")) { -123 if (displacement > 0xff || displacement < -0x7f) -124 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); -125 else -126 emit_hex_bytes(new_inst, displacement, 1); -127 } -128 else if (has_operand_metadata(curr, "disp16")) { -129 if (displacement > 0xffff || displacement < -0x7fff) -130 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); -131 else -132 emit_hex_bytes(new_inst, displacement, 2); -133 } -134 else if (has_operand_metadata(curr, "disp32")) { -135 emit_hex_bytes(new_inst, displacement, 4); -136 } -137 } -138 else { -139 new_inst.words.push_back(curr); -140 } -141 } -142 inst.words.swap(new_inst.words); -143 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); -144 } -145 } -146 -147 string data_to_string(const line& inst) { -148 ostringstream out; -149 for (int i = 0; i < SIZE(inst.words); ++i) { -150 if (i > 0) out << ' '; -151 out << inst.words.at(i).data; -152 } -153 return out.str(); -154 } -155 -156 string drop_last(const string& s) { -157 return string(s.begin(), --s.end()); -158 } -159 -160 //: Label definitions must be the first word on a line. No jumping inside -161 //: instructions. -162 //: They should also be the only word on a line. -163 //: However, you can absolutely have multiple labels map to the same address, -164 //: as long as they're on separate lines. + 92 if (Dump_map) + 93 cerr << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n'; + 94 put(byte_index, label, current_byte); + 95 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end(); + 96 // no modifying current_byte; label definitions won't be in the final binary + 97 } + 98 } + 99 } +100 } +101 +102 :(before "End Globals") +103 bool Dump_map = false; // currently used only by 'subx translate' +104 :(before "End Commandline Options") +105 else if (is_equal(*arg, "--map")) { +106 Dump_map = true; +107 } +108 +109 :(code) +110 void drop_labels(segment& code) { +111 for (int i = 0; i < SIZE(code.lines); ++i) { +112 line& inst = code.lines.at(i); +113 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label); +114 inst.words.erase(new_end, inst.words.end()); +115 } +116 } +117 +118 bool is_label(const word& w) { +119 return *w.data.rbegin() == ':'; +120 } +121 +122 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) { +123 int32_t byte_index_next_instruction_starts_at = 0; +124 for (int i = 0; i < SIZE(code.lines); ++i) { +125 line& inst = code.lines.at(i); +126 byte_index_next_instruction_starts_at += num_bytes(inst); +127 line new_inst; +128 for (int j = 0; j < SIZE(inst.words); ++j) { +129 const word& curr = inst.words.at(j); +130 if (contains_key(byte_index, curr.data)) { +131 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; +132 if (has_operand_metadata(curr, "disp8")) { +133 if (displacement > 0xff || displacement < -0x7f) +134 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); +135 else +136 emit_hex_bytes(new_inst, displacement, 1); +137 } +138 else if (has_operand_metadata(curr, "disp16")) { +139 if (displacement > 0xffff || displacement < -0x7fff) +140 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); +141 else +142 emit_hex_bytes(new_inst, displacement, 2); +143 } +144 else if (has_operand_metadata(curr, "disp32")) { +145 emit_hex_bytes(new_inst, displacement, 4); +146 } +147 } +148 else { +149 new_inst.words.push_back(curr); +150 } +151 } +152 inst.words.swap(new_inst.words); +153 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); +154 } +155 } +156 +157 string data_to_string(const line& inst) { +158 ostringstream out; +159 for (int i = 0; i < SIZE(inst.words); ++i) { +160 if (i > 0) out << ' '; +161 out << inst.words.at(i).data; +162 } +163 return out.str(); +164 } 165 -166 :(scenario multiple_labels_at) -167 == 0x1 -168 # instruction effective address operand displacement immediate -169 # op subop mod rm32 base index scale r32 -170 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -171 # address 1 -172 loop: -173 $loop2: -174 # address 1 (labels take up no space) -175 05 0x0d0c0b0a/imm32 # add to EAX -176 # address 6 -177 eb $loop2/disp8 -178 # address 8 -179 eb $loop3/disp8 -180 # address 0xa -181 $loop3: -182 +transform: label 'loop' is at address 1 -183 +transform: label '$loop2' is at address 1 -184 +transform: label '$loop3' is at address a -185 # first jump is to -7 -186 +transform: instruction after transform: 'eb f9' -187 # second jump is to 0 (fall through) -188 +transform: instruction after transform: 'eb 00' -189 -190 :(scenario label_too_short) -191 % Hide_errors = true; -192 == 0x1 -193 # instruction effective address operand displacement immediate -194 # op subop mod rm32 base index scale r32 -195 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -196 xz: -197 05 0x0d0c0b0a/imm32 # add to EAX -198 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name +166 string drop_last(const string& s) { +167 return string(s.begin(), --s.end()); +168 } +169 +170 //: Label definitions must be the first word on a line. No jumping inside +171 //: instructions. +172 //: They should also be the only word on a line. +173 //: However, you can absolutely have multiple labels map to the same address, +174 //: as long as they're on separate lines. +175 +176 :(scenario multiple_labels_at) +177 == 0x1 +178 # instruction effective address operand displacement immediate +179 # op subop mod rm32 base index scale r32 +180 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +181 # address 1 +182 loop: +183 $loop2: +184 # address 1 (labels take up no space) +185 05 0x0d0c0b0a/imm32 # add to EAX +186 # address 6 +187 eb $loop2/disp8 +188 # address 8 +189 eb $loop3/disp8 +190 # address 0xa +191 $loop3: +192 +transform: label 'loop' is at address 1 +193 +transform: label '$loop2' is at address 1 +194 +transform: label '$loop3' is at address a +195 # first jump is to -7 +196 +transform: instruction after transform: 'eb f9' +197 # second jump is to 0 (fall through) +198 +transform: instruction after transform: 'eb 00' 199 -200 :(scenario label_hex) +200 :(scenario label_too_short) 201 % Hide_errors = true; 202 == 0x1 203 # instruction effective address operand displacement immediate 204 # op subop mod rm32 base index scale r32 205 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -206 0xab: +206 xz: 207 05 0x0d0c0b0a/imm32 # add to EAX -208 +error: '0xab' looks like a hex number; use a different name +208 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name 209 -210 :(scenario label_negative_hex) +210 :(scenario label_hex) 211 % Hide_errors = true; 212 == 0x1 213 # instruction effective address operand displacement immediate 214 # op subop mod rm32 base index scale r32 215 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -216 -a: # indent to avoid looking like a trace_should_not_contain command for this scenario +216 0xab: 217 05 0x0d0c0b0a/imm32 # add to EAX -218 +error: '-a' starts with '-', which can be confused with a negative number; use a different name +218 +error: '0xab' looks like a hex number; use a different name 219 -220 //: now that we have labels, we need to adjust segment size computation to -221 //: ignore them. -222 -223 :(scenario segment_size_ignores_labels) -224 % Mem_offset = CODE_START; -225 == code # 0x08048074 -226 05/add 0x0d0c0b0a/imm32 # 5 bytes -227 foo: # 0 bytes -228 == data # 0x08049079 -229 bar: -230 00 -231 +transform: segment 1 begins at address 0x08049079 +220 :(scenario label_negative_hex) +221 % Hide_errors = true; +222 == 0x1 +223 # instruction effective address operand displacement immediate +224 # op subop mod rm32 base index scale r32 +225 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +226 -a: # indent to avoid looking like a trace_should_not_contain command for this scenario +227 05 0x0d0c0b0a/imm32 # add to EAX +228 +error: '-a' starts with '-', which can be confused with a negative number; use a different name +229 +230 //: now that we have labels, we need to adjust segment size computation to +231 //: ignore them. 232 -233 :(before "End num_bytes(curr) Special-cases") -234 else if (is_label(curr)) -235 ; // don't count it +233 :(scenario segment_size_ignores_labels) +234 % Mem_offset = CODE_START; +235 == code # 0x08048074 +236 05/add 0x0d0c0b0a/imm32 # 5 bytes +237 foo: # 0 bytes +238 == data # 0x08049079 +239 bar: +240 00 +241 +transform: segment 1 begins at address 0x08049079 +242 +243 :(before "End num_bytes(curr) Special-cases") +244 else if (is_label(curr)) +245 ; // don't count it diff --git a/html/subx/036global_variables.cc.html b/html/subx/036global_variables.cc.html index c37ea272..a386013d 100644 --- a/html/subx/036global_variables.cc.html +++ b/html/subx/036global_variables.cc.html @@ -81,7 +81,7 @@ if ('onhashchange' in window) { 17 +transform: global variable 'x' is at address 0x08049079 18 19 :(before "End Level-2 Transforms") - 20 Transform.push_back(rewrite_global_variables); + 20 Transform.push_back(rewrite_global_variables); 21 :(code) 22 void rewrite_global_variables(program& p) { 23 trace(99, "transform") << "-- rewrite global variables" << end(); @@ -107,7 +107,7 @@ if ('onhashchange' in window) { 43 ++current_address; 44 } 45 else { - 46 string variable = drop_last(curr.data); + 46 string variable = drop_last(curr.data); 47 // ensure variables look sufficiently different from raw hex 48 check_valid_name(variable); 49 if (trace_contains_errors()) return; @@ -123,7 +123,7 @@ if ('onhashchange' in window) { 59 60 void drop_global_variables(program& p) { 61 for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) - 62 drop_labels(p.segments.at(i)); + 62 drop_labels(p.segments.at(i)); 63 } 64 65 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) { @@ -145,7 +145,7 @@ if ('onhashchange' in window) { 81 emit_hex_bytes(new_inst, get(address, curr.data), 4); 82 } 83 inst.words.swap(new_inst.words); - 84 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); + 84 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); 85 } 86 } 87 @@ -159,7 +159,7 @@ if ('onhashchange' in window) { 95 //: requires first saving some data early before we pack operands 96 97 :(after "Begin Level-2 Transforms") - 98 Transform.push_back(correlate_disp32_with_mod); + 98 Transform.push_back(correlate_disp32_with_mod); 99 :(code) 100 void correlate_disp32_with_mod(program& p) { 101 if (p.segments.empty()) return; diff --git a/html/subx/038---literal_strings.cc.html b/html/subx/038---literal_strings.cc.html new file mode 100644 index 00000000..70494c30 --- /dev/null +++ b/html/subx/038---literal_strings.cc.html @@ -0,0 +1,281 @@ + + + + +Mu - subx/038---literal_strings.cc + + + + + + + + + + +
+  1 //: Allow instructions to mention literals directly.
+  2 //:
+  3 //: This layer will transparently move them to the global segment (assumed to
+  4 //: always be the second segment).
+  5 
+  6 :(scenario transform_literal_string)
+  7 % Mem_offset = CODE_START;
+  8 % Mem.resize(AFTER_STACK - CODE_START);
+  9 == code
+ 10   b8/copy "test"/imm32  # copy to EAX
+ 11 +transform: -- move literal strings to data segment
+ 12 +transform: adding global variable '__subx_global_1' containing "test"
+ 13 +transform: instruction after transform: 'b8 __subx_global_1'
+ 14 
+ 15 //: We don't rely on any transforms running in previous layers, but this layer
+ 16 //: knows about labels and global variables and will emit them for previous
+ 17 //: layers to transform.
+ 18 :(after "Begin Transforms")
+ 19 // Begin Level-3 Transforms
+ 20 Transform.push_back(transform_literal_strings);
+ 21 // End Level-3 Transforms
+ 22 
+ 23 :(before "End Globals")
+ 24 int Next_auto_global = 1;
+ 25 :(code)
+ 26 void transform_literal_strings(program& p) {
+ 27   trace(99, "transform") << "-- move literal strings to data segment" << end();
+ 28   if (p.segments.empty()) return;
+ 29   segment& code = p.segments.at(0);
+ 30   segment data;
+ 31   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+ 32     line& inst = code.lines.at(i);
+ 33     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+ 34       word& curr = inst.words.at(j);
+ 35       if (curr.data.at(0) != '"') continue;
+ 36       ostringstream global_name;
+ 37       global_name << "__subx_global_" << Next_auto_global;
+ 38       ++Next_auto_global;
+ 39       add_global_to_data_segment(global_name.str(), curr, data);
+ 40       curr.data = global_name.str();
+ 41     }
+ 42     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+ 43   }
+ 44   if (data.lines.empty()) return;
+ 45   if (SIZE(p.segments) < 2) {
+ 46     p.segments.resize(2);
+ 47     p.segments.at(1).lines.swap(data.lines);
+ 48   }
+ 49   vector<line>& existing_data = p.segments.at(1).lines;
+ 50   existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end());
+ 51 }
+ 52 
+ 53 void add_global_to_data_segment(const string& name, const word& value, segment& data) {
+ 54   trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
+ 55   // emit label
+ 56   data.lines.push_back(label(name));
+ 57   // emit size for size-prefixed array
+ 58   data.lines.push_back(line());
+ 59   emit_hex_bytes(data.lines.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/);
+ 60   // emit data byte by byte
+ 61   data.lines.push_back(line());
+ 62   line& curr = data.lines.back();
+ 63   for (int i = /*skip start quote*/1;  i < SIZE(value.data)-/*skip end quote*/1;  ++i) {
+ 64     char c = value.data.at(i);
+ 65     curr.words.push_back(word());
+ 66     curr.words.back().data = hex_byte_to_string(c);
+ 67     curr.words.back().metadata.push_back(string(1, c));
+ 68   }
+ 69 }
+ 70 
+ 71 line label(string s) {
+ 72   line result;
+ 73   result.words.push_back(word());
+ 74   result.words.back().data = (s+":");
+ 75   return result;
+ 76 }
+ 77 
+ 78 //: Within strings, whitespace is significant. So we need to redo our instruction
+ 79 //: parsing.
+ 80 
+ 81 :(scenarios parse_instruction_character_by_character)
+ 82 :(scenario instruction_with_string_literal)
+ 83 a "abc  def" z  # two spaces inside string
+ 84 +parse2: word: a
+ 85 +parse2: word: "abc  def"
+ 86 +parse2: word: z
+ 87 # no other words
+ 88 $parse2: 3
+ 89 
+ 90 :(before "End Line Parsing Special-cases(line_data -> l)")
+ 91 if (line_data.find('"') != string::npos) {  // can cause false-positives, but we can handle them
+ 92   parse_instruction_character_by_character(line_data, l);
+ 93   continue;
+ 94 }
+ 95 
+ 96 :(code)
+ 97 void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
+ 98   // parse literals
+ 99   istringstream in(line_data);
+100   in >> std::noskipws;
+101   line result;
+102   // add tokens (words or strings) one by one
+103   while (has_data(in)) {
+104     skip_whitespace(in);
+105     if (!has_data(in)) break;
+106     char c = in.get();
+107     if (c == '#') break;  // comment; drop rest of line
+108     if (c == ':') break;  // line metadata; skip for now
+109     if (c == '.') {
+110       if (!has_data(in)) break;  // comment token at end of line
+111       if (isspace(in.peek()))
+112         continue;  // '.' followed by space is comment token; skip
+113     }
+114     ostringstream w;
+115     w << c;
+116     if (c == '"') {
+117       // slurp until '"'
+118       while (has_data(in)) {
+119         in >> c;
+120         w << c;
+121         if (c == '"') break;
+122       }
+123     }
+124     // slurp any remaining characters until whitespace
+125     while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
+126       in >> c;
+127       w << c;
+128     }
+129     result.words.push_back(word());
+130     parse_word(w.str(), result.words.back());
+131     trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
+132   }
+133   if (!result.words.empty())
+134     out.push_back(result);
+135 }
+136 
+137 void skip_whitespace(istream& in) {
+138   while (true) {
+139     if (has_data(in) && isspace(in.peek())) in.get();
+140     else break;
+141   }
+142 }
+143 
+144 void skip_comment(istream& in) {
+145   if (has_data(in) && in.peek() == '#') {
+146     in.get();
+147     while (has_data(in) && in.peek() != '\n') in.get();
+148   }
+149 }
+150 
+151 // helper for tests
+152 void parse_instruction_character_by_character(const string& line_data) {
+153   vector<line> out;
+154   parse_instruction_character_by_character(line_data, out);
+155 }
+156 
+157 :(scenario parse2_comment_token_in_middle)
+158 a . z
+159 +parse2: word: a
+160 +parse2: word: z
+161 -parse2: word: .
+162 # no other words
+163 $parse2: 2
+164 
+165 :(scenario parse2_word_starting_with_dot)
+166 a .b c
+167 +parse2: word: a
+168 +parse2: word: .b
+169 +parse2: word: c
+170 
+171 :(scenario parse2_comment_token_at_start)
+172 . a b
+173 +parse2: word: a
+174 +parse2: word: b
+175 -parse2: word: .
+176 
+177 :(scenario parse2_comment_token_at_end)
+178 a b .
+179 +parse2: word: a
+180 +parse2: word: b
+181 -parse2: word: .
+182 
+183 :(scenario parse2_word_starting_with_dot_at_start)
+184 .a b c
+185 +parse2: word: .a
+186 +parse2: word: b
+187 +parse2: word: c
+188 
+189 :(scenario parse2_metadata)
+190 .a b/c d
+191 +parse2: word: .a
+192 +parse2: word: b /c
+193 +parse2: word: d
+194 
+195 :(scenario parse2_string_with_metadata)
+196 a "bc  def"/disp32 g
+197 +parse2: word: a
+198 +parse2: word: "bc  def" /disp32
+199 +parse2: word: g
+200 
+201 :(scenario parse2_string_with_metadata_at_end)
+202 a "bc  def"/disp32
+203 +parse2: word: a
+204 +parse2: word: "bc  def" /disp32
+205 
+206 :(code)
+207 void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
+208   parse_instruction_character_by_character(
+209       "68/push \"test\"/f"  // no newline, which is how calls from parse() will look
+210   );
+211   CHECK_TRACE_CONTENTS(
+212       "parse2: word: 68 /push^D"
+213       "parse2: word: \"test\" /f^D"
+214   );
+215 }
+
+ + + diff --git a/html/subx/040---tests.cc.html b/html/subx/040---tests.cc.html index 7a676bb1..8d17c343 100644 --- a/html/subx/040---tests.cc.html +++ b/html/subx/040---tests.cc.html @@ -60,110 +60,92 @@ if ('onhashchange' in window) {
-  1 //: Beginning of level 3: support for automatically aggregating functions into
-  2 //: test suites.
-  3 //:
-  4 //: (As explained in the transform layer, level 3 runs before level 2. We
-  5 //: can't use any of the transforms in previous layers. But we *do* rely on
-  6 //: those concepts being present in the input. Particularly labels.)
-  7 
-  8 :(after "Begin Transforms")
-  9 // Begin Level-3 Transforms
- 10 Transform.push_back(create_test_function);
- 11 // End Level-3 Transforms
- 12 
- 13 :(scenario run_test)
- 14 % Reg[ESP].u = 0x100;
- 15 == 0x1
- 16 main:
- 17   e8/call run_tests/disp32  # 5 bytes
- 18   f4/halt                   # 1 byte
- 19 
- 20 test_foo:  # offset 7
- 21   01 d8  # just some unique instruction: add EBX to EAX
- 22   c3/return
- 23 
- 24 # check that code in test_foo ran (implicitly called by run_tests)
- 25 +run: inst: 0x00000007
- 26 
- 27 :(code)
- 28 void create_test_function(program& p) {
- 29   if (p.segments.empty()) return;
- 30   segment& code = p.segments.at(0);
- 31   trace(99, "transform") << "-- create 'run_tests'" << end();
- 32   vector<line> new_insts;
- 33   for (int i = 0;  i < SIZE(code.lines);  ++i) {
- 34     line& inst = code.lines.at(i);
- 35     for (int j = 0;  j < SIZE(inst.words);  ++j) {
- 36       const word& curr = inst.words.at(j);
- 37       if (*curr.data.rbegin() != ':') continue;  // not a label
- 38       if (!starts_with(curr.data, "test_")) continue;
- 39       string fn = drop_last(curr.data);
- 40       new_insts.push_back(call(fn));
- 41     }
- 42   }
- 43   if (new_insts.empty()) return;  // no tests found
- 44   code.lines.push_back(label("run_tests"));
- 45   code.lines.insert(code.lines.end(), new_insts.begin(), new_insts.end());
- 46   code.lines.push_back(ret());
- 47 }
- 48 
- 49 string to_string(const segment& s) {
- 50   ostringstream out;
- 51   for (int i = 0;  i < SIZE(s.lines);  ++i) {
- 52     const line& l = s.lines.at(i);
- 53     for (int j = 0;  j < SIZE(l.words);  ++j) {
- 54       if (j > 0) out << ' ';
- 55       out << to_string(l.words.at(j));
- 56     }
- 57     out << '\n';
- 58   }
- 59   return out.str();
- 60 }
- 61 
- 62 string to_string(const word& w) {
- 63   ostringstream out;
- 64   out << w.data;
- 65   for (int i = 0;  i < SIZE(w.metadata);  ++i)
- 66     out << '/' << w.metadata.at(i);
- 67   return out.str();
- 68 }
- 69 
- 70 line label(string s) {
- 71   line result;
- 72   result.words.push_back(word());
- 73   result.words.back().data = (s+":");
- 74   return result;
- 75 }
- 76 
- 77 line call(string s) {
- 78   line result;
- 79   result.words.push_back(call());
- 80   result.words.push_back(disp32(s));
- 81   return result;
- 82 }
- 83 
- 84 word call() {
- 85   word result;
- 86   result.data = "e8";
- 87   result.metadata.push_back("call");
- 88   return result;
- 89 }
- 90 
- 91 word disp32(string s) {
- 92   word result;
- 93   result.data = s;
- 94   result.metadata.push_back("disp32");
- 95   return result;
- 96 }
- 97 
- 98 line ret() {
- 99   line result;
-100   result.words.push_back(word());
-101   result.words.back().data = "c3";
-102   result.words.back().metadata.push_back("return");
-103   return result;
-104 }
+ 1 //: Automatically aggregating functions into test suites.
+ 2 
+ 3 //: We don't rely on any transforms running in previous layers, but this layer
+ 4 //: knows about labels and will emit labels for previous layers to transform.
+ 5 :(after "Begin Transforms")
+ 6 // Begin Level-4 Transforms
+ 7 Transform.push_back(create_test_function);
+ 8 // End Level-4 Transforms
+ 9 
+10 :(scenario run_test)
+11 % Reg[ESP].u = 0x100;
+12 == 0x1
+13 main:
+14   e8/call run_tests/disp32  # 5 bytes
+15   f4/halt                   # 1 byte
+16 
+17 test_foo:  # offset 7
+18   01 d8  # just some unique instruction: add EBX to EAX
+19   c3/return
+20 
+21 # check that code in test_foo ran (implicitly called by run_tests)
+22 +run: inst: 0x00000007
+23 
+24 :(code)
+25 void create_test_function(program& p) {
+26   if (p.segments.empty()) return;
+27   segment& code = p.segments.at(0);
+28   trace(99, "transform") << "-- create 'run_tests'" << end();
+29   vector<line> new_insts;
+30   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+31     line& inst = code.lines.at(i);
+32     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+33       const word& curr = inst.words.at(j);
+34       if (*curr.data.rbegin() != ':') continue;  // not a label
+35       if (!starts_with(curr.data, "test_")) continue;
+36       string fn = drop_last(curr.data);
+37       new_insts.push_back(call(fn));
+38     }
+39   }
+40   if (new_insts.empty()) return;  // no tests found
+41   code.lines.push_back(label("run_tests"));
+42   code.lines.insert(code.lines.end(), new_insts.begin(), new_insts.end());
+43   code.lines.push_back(ret());
+44 }
+45 
+46 string to_string(const segment& s) {
+47   ostringstream out;
+48   for (int i = 0;  i < SIZE(s.lines);  ++i) {
+49     const line& l = s.lines.at(i);
+50     for (int j = 0;  j < SIZE(l.words);  ++j) {
+51       if (j > 0) out << ' ';
+52       out << to_string(l.words.at(j));
+53     }
+54     out << '\n';
+55   }
+56   return out.str();
+57 }
+58 
+59 line call(string s) {
+60   line result;
+61   result.words.push_back(call());
+62   result.words.push_back(disp32(s));
+63   return result;
+64 }
+65 
+66 word call() {
+67   word result;
+68   result.data = "e8";
+69   result.metadata.push_back("call");
+70   return result;
+71 }
+72 
+73 word disp32(string s) {
+74   word result;
+75   result.data = s;
+76   result.metadata.push_back("disp32");
+77   return result;
+78 }
+79 
+80 line ret() {
+81   line result;
+82   result.words.push_back(word());
+83   result.words.back().data = "c3";
+84   result.words.back().metadata.push_back("return");
+85   return result;
+86 }
 
diff --git a/html/subx/apps/crenshaw2-1.subx.html b/html/subx/apps/crenshaw2-1.subx.html index 73e18f96..c01985bb 100644 --- a/html/subx/apps/crenshaw2-1.subx.html +++ b/html/subx/apps/crenshaw2-1.subx.html @@ -6,17 +6,18 @@ - + + + + + +
+  1 ## Null-terminated vs length-prefixed ascii strings.
+  2 #
+  3 # By default we create strings with a 4-byte length prefix rather than a null suffix.
+  4 # However, commandline arguments come null-prefixed from the Linux kernel.
+  5 # This example shows a helper that can compare a commandline argument with the
+  6 # (length-prefixed) literal string "target".
+  7 #
+  8 # To run:
+  9 #   $ subx translate ex11.subx ex11
+ 10 #   $ subx run ex11  # runs a series of tests
+ 11 #   ......  # all tests pass
+ 12 #
+ 13 # (We can't yet run the tests when given a "test" commandline argument,
+ 14 # because checking for it would require the function being tested! Breakage
+ 15 # would cause tests to not run, rather than to fail as we'd like.)
+ 16 
+ 17 == code
+ 18 # instruction                     effective address                                                   operand     displacement    immediate
+ 19 # op          subop               mod             rm32          base        index         scale       r32
+ 20 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+ 21 
+ 22 # main:
+ 23   e8/call  run_tests/disp32  # 'run_tests' is a function created automatically by SubX. It calls all functions that start with 'test_'.
+ 24   # exit(EAX)
+ 25   89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                 # copy EAX to EBX
+ 26   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
+ 27   cd/syscall  0x80/imm8
+ 28 
+ 29 # compare a null-terminated ascii string with a more idiomatic length-prefixed byte array
+ 30 # reason for the name: the only place we should have null-terminated ascii strings is from commandline args
+ 31 argv_equal:  # s : null-terminated ascii string, benchmark : length-prefixed ascii string -> EAX : boolean
+ 32   # pseudocode:
+ 33   #   initialize n = b.length
+ 34   #   initialize s1 = s
+ 35   #   initialize s2 = b.data
+ 36   #   i = 0
+ 37   #   for (i = 0; i < n; ++n)
+ 38   #     c1 = *s1
+ 39   #     c2 = *s2
+ 40   #     if c1 == 0
+ 41   #       return false
+ 42   #     if c1 != c2
+ 43   #       return false
+ 44   #   return *s1 == 0
+ 45 
+ 46   # initialize s into EDI
+ 47   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           7/r32/EDI   8/disp8         .                 # copy *(ESP+8) to EDI
+ 48   # initialize benchmark length n into EDX
+ 49   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           2/r32/EDX   4/disp8         .                 # copy *(ESP+4) to EDX
+ 50   8b/copy                         0/mod/indirect  2/rm32/EDX    .           .             .           2/r32/EDX   .               .                 # copy *EDX to EDX
+ 51   # initialize benchmark data into ESI
+ 52   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           6/r32/ESI   4/disp8         .                 # copy *(ESP+4) to ESI
+ 53   81          0/subop/add         3/mod/direct    6/rm32/ESI    .           .             .           .           .               4/imm32           # add 4 to ESI
+ 54   # initialize loop counter i into ECX
+ 55   b9/copy                         .               .             .           .             .           .           .               0/imm32/exit      # copy 1 to ECX
+ 56   # while (i/ECX < n/EDX)
+ 57 $argv_loop:
+ 58   39/compare                      3/mod/direct    1/rm32/ECX    .           .             .           2/r32/EDX   .               .                 # compare ECX with EDX
+ 59   74/jump-if-equal  $argv_break/disp8
+ 60     # c1/EAX, c2/EBX = *s, *benchmark
+ 61   b8/copy  0/imm32  # clear EAX
+ 62   8a/copy                         0/mod/indirect  7/rm32/EDI    .           .             .           0/r32/EAX   .               .                 # copy byte at *EDI to lower byte of EAX
+ 63   bb/copy  0/imm32  # clear EBX
+ 64   8a/copy                         0/mod/indirect  6/rm32/ESI    .           .             .           3/r32/EBX   .               .                 # copy byte at *ESI to lower byte of EBX
+ 65     # if (c1 == 0) return false
+ 66   3d/compare                      .               .             .           .             .           .           .               0/imm32           # compare EAX with 0
+ 67   74/jump-if-equal  $argv_fail/disp8
+ 68     # if (c1 != c2) return false
+ 69   39/compare                      3/mod/direct    0/rm32/EAX    .           .             .           3/r32/EBX   .               .                 # compare EAX with EBX
+ 70   75/jump-if-not-equal  $argv_fail/disp8
+ 71     # ++s1, ++s2, ++i
+ 72   41/inc-ECX
+ 73   46/inc-ESI
+ 74   47/inc-EDI
+ 75   # end while
+ 76   eb/jump  $argv_loop/disp8
+ 77 $argv_break:
+ 78   # if (*s/EDI == 0) return true
+ 79   b8/copy  0/imm32  # clear EAX
+ 80   8a/copy                         0/mod/indirect  7/rm32/EDI    .           .             .           0/r32/EAX   .               .                 # copy byte at *EDI to lower byte of EAX
+ 81   81          7/subop/compare     3/mod/direct    0/rm32/EAX    .           .             .           .           .               0/imm32           # compare EAX with 0
+ 82   75/jump-if-not-equal  $argv_fail/disp8
+ 83   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
+ 84   c3/return
+ 85   # return false
+ 86 $argv_fail:
+ 87   b8/copy                         .               .             .           .             .           .           .               0/imm32           # copy 0 to EAX
+ 88   c3/return
+ 89 
+ 90 ## tests
+ 91 
+ 92 test_compare_null_argv_with_empty_array:
+ 93   # EAX = argv_equal(Null_argv, "")
+ 94     # push args
+ 95   68/push  Null_argv/imm32
+ 96   68/push  ""/imm32
+ 97     # call
+ 98   e8/call  argv_equal/disp32
+ 99     # discard args
+100   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+101   # call check_ints_equal(EAX, 1)
+102   50/push-EAX
+103   68/push  1/imm32/true
+104   68/push  "F - test_compare_null_argv_with_empty_array"/imm32
+105     # call
+106   e8/call  check_ints_equal/disp32
+107     # discard args
+108   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+109   c3/return
+110 
+111 test_compare_null_argv_with_non_empty_array:
+112   # EAX = argv_equal(Null_argv, "Abc")
+113     # push args
+114   68/push  Null_argv/imm32
+115   68/push  "Abc"/imm32
+116     # call
+117   e8/call  argv_equal/disp32
+118     # discard args
+119   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+120   # call check_ints_equal(EAX, 0)
+121   50/push-EAX
+122   68/push  0/imm32/false
+123   68/push  "F - test_compare_null_argv_with_non_empty_array"/imm32
+124     # call
+125   e8/call  check_ints_equal/disp32
+126     # discard args
+127   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+128   c3/return
+129 
+130 test_compare_argv_with_equal_array:
+131   # EAX = argv_equal(Abc_argv, "Abc")
+132     # push args
+133   68/push  Abc_argv/imm32
+134   68/push  "Abc"/imm32
+135     # call
+136   e8/call  argv_equal/disp32
+137     # discard args
+138   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+139   # call check_ints_equal(EAX, 1)
+140   50/push-EAX
+141   68/push  1/imm32/true
+142   68/push  "F - test_compare_argv_with_equal_array"/imm32
+143     # call
+144   e8/call  check_ints_equal/disp32
+145     # discard args
+146   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+147   c3/return
+148 
+149 test_compare_argv_with_inequal_array:
+150   # EAX = argv_equal(Abc_argv, "Adc")
+151     # push args
+152   68/push  Abc_argv/imm32
+153   68/push  "Adc"/imm32
+154     # call
+155   e8/call  argv_equal/disp32
+156     # discard args
+157   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+158   # call check_ints_equal(EAX, 0)
+159   50/push-EAX
+160   68/push  0/imm32/false
+161   68/push  "F - test_compare_argv_with_equal_array"/imm32
+162     # call
+163   e8/call  check_ints_equal/disp32
+164     # discard args
+165   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+166   c3/return
+167 
+168 test_compare_argv_with_empty_array:
+169   # EAX = argv_equal(Abc_argv, "")
+170     # push args
+171   68/push  Abc_argv/imm32
+172   68/push  ""/imm32
+173     # call
+174   e8/call  argv_equal/disp32
+175     # discard args
+176   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+177   # call check_ints_equal(EAX, 0)
+178   50/push-EAX
+179   68/push  0/imm32/false
+180   68/push  "F - test_compare_argv_with_equal_array"/imm32
+181     # call
+182   e8/call  check_ints_equal/disp32
+183     # discard args
+184   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+185   c3/return
+186 
+187 test_compare_argv_with_shorter_array:
+188   # EAX = argv_equal(Abc_argv, "Ab")
+189     # push args
+190   68/push  Abc_argv/imm32
+191   68/push  "Ab"/imm32
+192     # call
+193   e8/call  argv_equal/disp32
+194     # discard args
+195   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+196   # call check_ints_equal(EAX, 0)
+197   50/push-EAX
+198   68/push  0/imm32/false
+199   68/push  "F - test_compare_argv_with_shorter_array"/imm32
+200     # call
+201   e8/call  check_ints_equal/disp32
+202     # discard args
+203   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+204   c3/return
+205 
+206 test_compare_argv_with_longer_array:
+207   # EAX = argv_equal(Abc_argv, "Abcd")
+208     # push args
+209   68/push  Abc_argv/imm32
+210   68/push  "Abcd"/imm32
+211     # call
+212   e8/call  argv_equal/disp32
+213     # discard args
+214   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add 8 to ESP
+215   # call check_ints_equal(EAX, 0)
+216   50/push-EAX
+217   68/push  0/imm32/false
+218   68/push  "F - test_compare_argv_with_longer_array"/imm32
+219     # call
+220   e8/call  check_ints_equal/disp32
+221     # discard args
+222   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add 12 to ESP
+223   c3/return
+224 
+225 ## helpers
+226 
+227 # print msg to stderr if a != b, otherwise print "."
+228 check_ints_equal:  # (a : int, b : int, msg : (address array byte)) -> boolean
+229   # load args into EAX, EBX and ECX
+230   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           0/r32/EAX   0xc/disp8       .                 # copy *(ESP+12) to EAX
+231   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           3/r32/EBX   0x8/disp8       .                 # copy *(ESP+8) to EBX
+232   # if EAX == b/EBX
+233   39/compare                      3/mod/direct    0/rm32/EAX    .           .             .           3/r32/EBX   .               .                 # compare EAX and EBX
+234   75/jump-if-unequal  $check_ints_equal:else/disp8
+235     # print('.')
+236       # push args
+237   68/push  "."/imm32
+238       # call
+239   e8/call  write_stderr/disp32
+240       # discard arg
+241   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
+242     # return
+243   c3/return
+244   # else:
+245 $check_ints_equal:else:
+246   # copy msg into ECX
+247   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           1/r32/ECX   4/disp8         .                 # copy *(ESP+4) to ECX
+248     # print(ECX)
+249       # push args
+250   51/push-ECX
+251       # call
+252   e8/call  write_stderr/disp32
+253       # discard arg
+254   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
+255     # print newline
+256       # push args
+257   68/push  Newline/imm32
+258       # call
+259   e8/call  write_stderr/disp32
+260       # discard arg
+261   81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add 4 to ESP
+262   # end
+263   c3/return
+264 
+265 write_stderr:  # s : (address array byte) -> <void>
+266   # save registers
+267   50/push-EAX
+268   51/push-ECX
+269   52/push-EDX
+270   53/push-EBX
+271   # write(2/stderr, (data) s+4, (size) *s)
+272     # fd = 2 (stderr)
+273   bb/copy                         .               .             .           .             .           .           .               2/imm32           # copy 2 to EBX
+274     # x = s+4
+275   8b/copy                         1/mod/*+disp8   4/rm32/SIB    4/base/ESP  4/index/none  .           1/r32/ECX   0x14/disp8      .                 # copy *(ESP+20) to ECX
+276   81          0/subop/add         3/mod/direct    1/rm32/ECX    .           .             .           .           .               4/imm32           # add 4 to ECX
+277     # size = *s
+278   8b/copy                         1/mod/*+disp8   4/rm32/SIB    4/base/ESP  4/index/none  .           2/r32/EDX   0x14/disp8      .                 # copy *(ESP+20) to EDX
+279   8b/copy                         0/mod/indirect  2/rm32/EDX    .           .             .           2/r32/EDX   .               .                 # copy *EDX to EDX
+280     # call write()
+281   b8/copy                         .               .             .           .             .           .           .               4/imm32/write     # copy 1 to EAX
+282   cd/syscall  0x80/imm8
+283   # restore registers
+284   5b/pop-EBX
+285   5a/pop-EDX
+286   59/pop-ECX
+287   58/pop-EAX
+288   # end
+289   c3/return
+290 
+291 == data
+292 Newline:
+293   # size
+294   01 00 00 00
+295   # data
+296   0a/newline
+297 
+298 # for argv_equal tests
+299 Null_argv:
+300   00/null
+301 Abc_argv:
+302   41/A 62/b 63/c 00/null
+303 
+304 # vim:ft=subx:nowrap:so=0
+
+ + + diff --git a/html/subx/examples/ex5.subx.html b/html/subx/examples/ex5.subx.html index aa4f9761..7bd76e98 100644 --- a/html/subx/examples/ex5.subx.html +++ b/html/subx/examples/ex5.subx.html @@ -16,6 +16,7 @@ a { color:#eeeeee; text-decoration: none; } a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } .LineNr { color: #444444; } +.Delimiter { color: #800080; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } .SalientComment { color: #00ffff; } @@ -65,39 +66,36 @@ if ('onhashchange' in window) { 10 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes 11 12 # main: -13 # prolog -14 55/push # push EBP -15 89/copy 3/mod/direct 5/rm32/EBP 4/r32/ESP # copy ESP to EBP -16 # allocate x on the stack -17 81 5/subop/subtract 3/mod/direct 4/rm32/ESP 4/imm32 # subtract 4 bytes from ESP -18 -19 # read(stdin, x, 1) -20 # fd = 0 (stdin) -21 bb/copy 0/imm32 # copy 0 to EBX -22 # initialize x (location to write result to) -23 89/copy 3/mod/direct 1/rm32/ECX 5/r32/EBP # copy EBP to ECX -24 # size = 1 character -25 ba/copy 1/imm32 # copy 1 to EDX -26 # read(fd, x, size) -27 b8/copy 3/imm32/read # copy 3 to EAX -28 cd/syscall 0x80/imm8 -29 -30 # write(stdout, x, 1) -31 # fd = 1 (stdout) -32 bb/copy 1/imm32 # copy 1 to EBX -33 # initialize x (location to read from) -34 89/copy 3/mod/direct 1/rm32/ECX 5/r32/EBP # copy EBP to ECX -35 # size = 1 character -36 ba/copy 1/imm32 # copy 1 to EDX -37 # write(fd, x, size) -38 b8/copy 4/imm32/write # copy 4 to EAX -39 cd/syscall 0x80/imm8 -40 -41 # exit(EBX) -42 b8/copy 1/imm32/exit # copy 1 to EAX -43 cd/syscall 0x80/imm8 -44 -45 # vim:ft=subx:nowrap +13 # allocate x on the stack +14 81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # subtract 4 bytes from ESP +15 +16 # read(stdin, x, 1) +17 # fd = 0 (stdin) +18 bb/copy . . . . . . . 0/imm32 # copy 0 to EBX +19 # initialize x (location to write result to) +20 8d/copy-address 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 1/r32/ECX 4/disp8 . # copy ESP+4 to ECX +21 # size = 1 character +22 ba/copy . . . . . . . 1/imm32 # copy 1 to EDX +23 # read(fd, x, size) +24 b8/copy . . . . . . . 3/imm32/read # copy 3 to EAX +25 cd/syscall 0x80/imm8 +26 +27 # write(stdout, x, 1) +28 # fd = 1 (stdout) +29 bb/copy . . . . . . . 1/imm32 # copy 1 to EBX +30 # initialize x (location to read from) +31 8d/copy-address 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 1/r32/ECX 4/disp8 . # copy ESP+4 to ECX +32 # size = 1 character +33 ba/copy . . . . . . . 1/imm32 # copy 1 to EDX +34 # write(fd, x, size) +35 b8/copy . . . . . . . 4/imm32/write # copy 4 to EAX +36 cd/syscall 0x80/imm8 +37 +38 # exit(EBX) +39 b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX +40 cd/syscall 0x80/imm8 +41 +42 # vim:ft=subx:nowrap diff --git a/html/subx/examples/ex8.subx.html b/html/subx/examples/ex8.subx.html index e091fe9b..65f5e09c 100644 --- a/html/subx/examples/ex8.subx.html +++ b/html/subx/examples/ex8.subx.html @@ -77,44 +77,38 @@ if ('onhashchange' in window) { 21 # var s = argv[1] (EBX) 22 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 3/r32/EBX 8/disp8 . # copy *(ESP+8) to EBX 23 # call ascii_length(EBX) -24 # prepare call -25 55/push . . . . . . . . # push EBP -26 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP -27 # push args -28 53/push . . . . . . . . # push EBX -29 # call -30 e8/call ascii_length/disp32 -31 # discard args -32 5a/pop . . . . . . . . # pop into EDX -33 # clean up after call -34 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP -35 5d/pop . . . . . . . . # pop to EBP -36 -37 # exit(EAX) -38 89/copy 3/mod/direct 3/rm32/EBX . . . 0/r32/EAX . . # copy EAX to EBX -39 b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX -40 cd/syscall 0x80/imm8 -41 -42 ascii_length: # (s) -43 # initialize s (EDX) -44 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 2/r32/EDX 4/disp8 # copy *(ESP+4) to EDX -45 # var result = 0 (EAX) -46 b8/copy . . . . . . . 0/imm32 # copy 0 to EAX -47 $ascii_length_loop: -48 # var c = *s (ECX) -49 8a/copy 0/mod/* 2/rm32/EDX . . . 1/r32/ECX . . # copy byte at *EDX to lower byte of ECX -50 # if c == '\0' break -51 81 7/subop/compare 3/mod/direct 1/rm32/ECX . . . . . 0/imm32 # compare ECX with 0 -52 74/jump-if-equal $ascii_length_ret/disp8 -53 # ++s -54 81 0/subop/add 3/mod/direct 2/rm32/EDX . . . . . 1/imm32 # add 1 to EDX -55 # ++result -56 81 0/subop/add 3/mod/direct 0/rm32/EAX . . . . . 1/imm32 # add 1 to EAX -57 # loop -58 eb/jump $ascii_length_loop/disp8 -59 $ascii_length_ret: -60 # return (result in EAX) -61 c3/return +24 # push args +25 53/push-EBX +26 # call +27 e8/call ascii_length/disp32 +28 # discard args +29 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add 4 to ESP +30 +31 # exit(EAX) +32 89/copy 3/mod/direct 3/rm32/EBX . . . 0/r32/EAX . . # copy EAX to EBX +33 b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX +34 cd/syscall 0x80/imm8 +35 +36 ascii_length: # (s) +37 # initialize s (EDX) +38 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 2/r32/EDX 4/disp8 # copy *(ESP+4) to EDX +39 # var result = 0 (EAX) +40 b8/copy . . . . . . . 0/imm32 # copy 0 to EAX +41 $ascii_length_loop: +42 # var c = *s (ECX) +43 8a/copy 0/mod/* 2/rm32/EDX . . . 1/r32/ECX . . # copy byte at *EDX to lower byte of ECX +44 # if c == '\0' break +45 81 7/subop/compare 3/mod/direct 1/rm32/ECX . . . . . 0/imm32 # compare ECX with 0 +46 74/jump-if-equal $ascii_length_ret/disp8 +47 # ++s +48 81 0/subop/add 3/mod/direct 2/rm32/EDX . . . . . 1/imm32 # add 1 to EDX +49 # ++result +50 81 0/subop/add 3/mod/direct 0/rm32/EAX . . . . . 1/imm32 # add 1 to EAX +51 # loop +52 eb/jump $ascii_length_loop/disp8 +53 $ascii_length_ret: +54 # return (result in EAX) +55 c3/return diff --git a/html/subx/examples/ex9.subx.html b/html/subx/examples/ex9.subx.html index 6665f520..78d8323e 100644 --- a/html/subx/examples/ex9.subx.html +++ b/html/subx/examples/ex9.subx.html @@ -82,8 +82,8 @@ if ('onhashchange' in window) { 26 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 3/r32/EBX 0xc/disp8 . # copy *(ESP+12) to EBX 27 # call string_equal(s1, s2) 28 # push args -29 50/push . . . . . . . . # push EAX -30 53/push . . . . . . . . # push EBX +29 50/push-EAX +30 53/push-EBX 31 # call 32 e8/call ascii_difference/disp32 33 # discard args @@ -95,7 +95,7 @@ if ('onhashchange' in window) { 39 40 ascii_difference: # (s1, s2) : null-terminated ascii strings 41 # a = first letter of s1 (ECX) -42 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 0/r32/EAX 8/disp8 # copy *(ESP+8) to EAX +42 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 0/r32/EAX 8/disp8 . # copy *(ESP+8) to EAX 43 8b/copy 0/mod/indirect 0/rm32/EAX . . . 0/r32/EAX . . # copy *EAX to EAX 44 # b = first letter of s2 (EDX) 45 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 1/r32/ECX 4/disp8 # copy *(ESP+4) to ECX -- cgit 1.4.1-2-gfad0