Mu - 011run.cc

From 3350c34a74844e21ea69077e01efff3bae64bdcd Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Tue, 23 Mar 2021 17:31:08 -0700 Subject: . --- html/011run.cc.html | 517 ---------------------------------------------------- 1 file changed, 517 deletions(-) delete mode 100644 html/011run.cc.html (limited to 'html/011run.cc.html') diff --git a/html/011run.cc.html b/html/011run.cc.html deleted file mode 100644 index 793d8bf0..00000000 --- a/html/011run.cc.html +++ /dev/null @@ -1,517 +0,0 @@ - - - - -Mu - 011run.cc - - - - - - - - - - -https://github.com/akkartik/mu/blob/main/011run.cc -
-  1 //: Running SubX programs on the VM.
-  2 
-  3 //: (Not to be confused with the 'run' subcommand for running ELF binaries on
-  4 //: the VM. That comes later.)
-  5 
-  6 :(before "End Help Texts")
-  7 put_new(Help, "syntax",
-  8   "SubX programs consist of segments, each segment in turn consisting of lines.\n"
-  9   "Line-endings are significant; each line should contain a single\n"
- 10   "instruction, macro or directive.\n"
- 11   "\n"
- 12   "Comments start with the '#' character. It should be at the start of a word\n"
- 13   "(start of line, or following a space).\n"
- 14   "\n"
- 15   "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
- 16   "the segment and a (sometimes approximate) starting address in memory.\n"
- 17   "The name 'code' is special; instructions to execute should always go here.\n"
- 18   "\n"
- 19   "The resulting binary starts running code from a label called 'Entry'\n"
- 20   "in the code segment.\n"
- 21   "\n"
- 22   "Segments with the same name get merged together. This rule helps keep functions\n"
- 23   "and their data close together in .subx files.\n"
- 24   "You don't have to specify the starting address after the first time.\n"
- 25   "\n"
- 26   "Lines consist of a series of words. Words can contain arbitrary metadata\n"
- 27   "after a '/', but they can never contain whitespace. Metadata has no effect\n"
- 28   "at runtime, but can be handy when rewriting macros.\n"
- 29   "\n"
- 30   "Check out the example programs in the apps/ directory, particularly apps/ex*.\n"
- 31 );
- 32 :(before "End Help Contents")
- 33 cerr << "  syntax\n";
- 34 
- 35 :(code)
- 36 void test_copy_imm32_to_EAX() {
- 37   // At the lowest level, SubX programs are a series of hex bytes, each
- 38   // (variable-length) instruction on one line.
- 39   run(
- 40       // Comments start with '#' and are ignored.
- 41       "# comment\n"
- 42       // Segment headers start with '==', a name and a starting hex address.
- 43       // There's usually one code and one data segment. The code segment
- 44       // always comes first.
- 45       "== code 0x1\n"  // code segment
- 46 
- 47       // After the header, each segment consists of lines, and each line
- 48       // consists of words separated by whitespace.
- 49       //
- 50       // All words can have metadata after a '/'. No spaces allowed in
- 51       // metadata, of course.
- 52       // Unrecognized metadata never causes errors, so you can use it for
- 53       // documentation.
- 54       //
- 55       // Within the code segment in particular, x86 instructions consist of
- 56       // some number of the following parts and sub-parts (see the Readme and
- 57       // cheatsheet.pdf for details):
- 58       //   opcodes: 1-3 bytes
- 59       //   ModR/M byte
- 60       //   SIB byte
- 61       //   displacement: 0/1/2/4 bytes
- 62       //   immediate: 0/1/2/4 bytes
- 63       // opcode        ModR/M                    SIB                   displacement    immediate
- 64       // instruction   mod, reg, Reg/Mem bits    scale, index, base
- 65       // 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
- 66       "  b8            .                         .                     .               0a 0b 0c 0d\n"  // copy 0x0d0c0b0a to EAX
- 67       // The periods are just to help the eye track long gaps between columns,
- 68       // and are otherwise ignored.
- 69   );
- 70   // This program, when run, causes the following events in the trace:
- 71   CHECK_TRACE_CONTENTS(
- 72       "load: 0x00000001 -> b8\n"
- 73       "load: 0x00000002 -> 0a\n"
- 74       "load: 0x00000003 -> 0b\n"
- 75       "load: 0x00000004 -> 0c\n"
- 76       "load: 0x00000005 -> 0d\n"
- 77       "run: copy imm32 0x0d0c0b0a to EAX\n"
- 78   );
- 79 }
- 80 
- 81 // top-level helper for tests: parse the input, load the hex bytes into memory, run
- 82 void run(const string& text_bytes) {
- 83   program p;
- 84   istringstream in(text_bytes);
- 85   // Loading Test Program
- 86   parse(in, p);
- 87   if (trace_contains_errors()) return;  // if any stage raises errors, stop immediately
- 88   // Running Test Program
- 89   load(p);
- 90   if (trace_contains_errors()) return;
- 91   // convenience to keep tests concise: 'Entry' label need not be provided
- 92   // not allowed in real programs
- 93   if (p.entry)
- 94     EIP = p.entry;
- 95   else
- 96     EIP = find(p, "code")->start;
- 97   while (EIP < End_of_program)
- 98     run_one_instruction();
- 99 }
-100 
-101 //:: core data structures
-102 
-103 :(before "End Types")
-104 struct program {
-105   uint32_t entry;
-106   vector<segment> segments;
-107   program() { entry = 0; }
-108 };
-109 :(before "struct program")
-110 struct segment {
-111   string name;
-112   uint32_t start;
-113   vector<line> lines;
-114   // End segment Fields
-115   segment() {
-116     start = 0;
-117     // End segment Constructor
-118   }
-119 };
-120 :(before "struct segment")
-121 struct line {
-122   vector<word> words;
-123   vector<string> metadata;
-124   string original;
-125 };
-126 :(before "struct line")
-127 struct word {
-128   string original;
-129   string data;
-130   vector<string> metadata;
-131 };
-132 
-133 //:: parse
-134 
-135 :(code)
-136 void parse(istream& fin, program& out) {
-137   segment* curr_segment = NULL;
-138   vector<line> l;
-139   while (has_data(fin)) {
-140     string line_data;
-141     line curr;
-142     getline(fin, line_data);
-143     curr.original = line_data;
-144     trace(99, "parse") << "line: " << line_data << end();
-145     // End Line Parsing Special-cases(line_data -> l)
-146     istringstream lin(line_data);
-147     while (has_data(lin)) {
-148       string word_data;
-149       lin >> word_data;
-150       if (word_data.empty()) continue;
-151       if (word_data[0] == '#') break;  // comment
-152       if (word_data == ".") continue;  // comment token
-153       if (word_data == "==") {
-154         flush(curr_segment, l);
-155         string segment_name;
-156         lin >> segment_name;
-157         curr_segment = find(out, segment_name);
-158         if (curr_segment != NULL) {
-159           trace(3, "parse") << "appending to segment '" << segment_name << "'" << end();
-160         }
-161         else {
-162           trace(3, "parse") << "new segment '" << segment_name << "'" << end();
-163           uint32_t seg_start = 0;
-164           lin >> std::hex >> seg_start;
-165           sanity_check_program_segment(out, seg_start);
-166           out.segments.push_back(segment());
-167           curr_segment = &out.segments.back();
-168           curr_segment->name = segment_name;
-169           curr_segment->start = seg_start;
-170           if (trace_contains_errors()) continue;
-171           trace(3, "parse") << "starts at address 0x" << HEXWORD << curr_segment->start << end();
-172         }
-173         break;  // skip rest of line
-174       }
-175       if (word_data[0] == ':') {
-176         // todo: line metadata
-177         break;
-178       }
-179       curr.words.push_back(word());
-180       parse_word(word_data, curr.words.back());
-181       trace(99, "parse") << "word: " << to_string(curr.words.back());
-182     }
-183     if (!curr.words.empty())
-184       l.push_back(curr);
-185   }
-186   flush(curr_segment, l);
-187   trace(99, "parse") << "done" << end();
-188 }
-189 
-190 segment* find(program& p, const string& segment_name) {
-191   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-192     if (p.segments.at(i).name == segment_name)
-193       return &p.segments.at(i);
-194   }
-195   return NULL;
-196 }
-197 
-198 void flush(segment* s, vector<line>& lines) {
-199   if (lines.empty()) return;
-200   if (s == NULL) {
-201     raise << "input does not start with a '==' section header\n" << end();
-202     return;
-203   }
-204   trace(3, "parse") << "flushing segment" << end();
-205   s->lines.insert(s->lines.end(), lines.begin(), lines.end());
-206   lines.clear();
-207 }
-208 
-209 void parse_word(const string& data, word& out) {
-210   out.original = data;
-211   istringstream win(data);
-212   if (getline(win, out.data, '/')) {
-213     string m;
-214     while (getline(win, m, '/'))
-215       out.metadata.push_back(m);
-216   }
-217 }
-218 
-219 void sanity_check_program_segment(const program& p, uint32_t addr) {
-220   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-221     if (p.segments.at(i).start == addr)
-222       raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
-223   }
-224 }
-225 
-226 // helper for tests
-227 void parse(const string& text_bytes) {
-228   program p;
-229   istringstream in(text_bytes);
-230   parse(in, p);
-231 }
-232 
-233 void test_detect_duplicate_segments() {
-234   Hide_errors = true;
-235   parse(
-236       "== segment1 0xee\n"
-237       "ab\n"
-238       "== segment2 0xee\n"
-239       "cd\n"
-240   );
-241   CHECK_TRACE_CONTENTS(
-242       "error: can't have multiple segments starting at address 0x000000ee\n"
-243   );
-244 }
-245 
-246 //:: load
-247 
-248 void load(const program& p) {
-249   if (find(p, "code") == NULL) {
-250     raise << "no code to run\n" << end();
-251     return;
-252   }
-253   // Ensure segments are disjoint.
-254   set<uint32_t> overlap;
-255   for (int i = 0;   i < SIZE(p.segments);  ++i) {
-256     const segment& seg = p.segments.at(i);
-257     uint32_t addr = seg.start;
-258     if (!already_allocated(addr))
-259       Mem.push_back(vma(seg.start));
-260     trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
-261     for (int j = 0;  j < SIZE(seg.lines);  ++j) {
-262       const line& l = seg.lines.at(j);
-263       for (int k = 0;  k < SIZE(l.words);  ++k) {
-264         const word& w = l.words.at(k);
-265         uint8_t val = hex_byte(w.data);
-266         if (trace_contains_errors()) return;
-267         assert(overlap.find(addr) == overlap.end());
-268         write_mem_u8(addr, val);
-269         overlap.insert(addr);
-270         trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
-271         ++addr;
-272       }
-273     }
-274     if (seg.name == "code") {
-275       End_of_program = addr;
-276     }
-277   }
-278 }
-279 
-280 const segment* find(const program& p, const string& segment_name) {
-281   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-282     if (p.segments.at(i).name == segment_name)
-283       return &p.segments.at(i);
-284   }
-285   return NULL;
-286 }
-287 
-288 uint8_t hex_byte(const string& s) {
-289   if (contains_uppercase(s)) {
-290     raise << "uppercase hex not allowed: " << s << '\n' << end();
-291     return 0;
-292   }
-293   istringstream in(s);
-294   int result = 0;
-295   in >> std::hex >> result;
-296   if (!in || !in.eof()) {
-297     raise << "token '" << s << "' is not a hex byte\n" << end();
-298     return '\0';
-299   }
-300   if (result > 0xff || result < -0x8f) {
-301     raise << "token '" << s << "' is not a hex byte\n" << end();
-302     return '\0';
-303   }
-304   return static_cast<uint8_t>(result);
-305 }
-306 
-307 void test_number_too_large() {
-308   Hide_errors = true;
-309   parse_and_load(
-310       "== code 0x1\n"
-311       "01 cab\n"
-312   );
-313   CHECK_TRACE_CONTENTS(
-314       "error: token 'cab' is not a hex byte\n"
-315   );
-316 }
-317 
-318 void test_invalid_hex() {
-319   Hide_errors = true;
-320   parse_and_load(
-321       "== code 0x1\n"
-322       "01 cx\n"
-323   );
-324   CHECK_TRACE_CONTENTS(
-325       "error: token 'cx' is not a hex byte\n"
-326   );
-327 }
-328 
-329 void test_negative_number() {
-330   parse_and_load(
-331       "== code 0x1\n"
-332       "01 -02\n"
-333   );
-334   CHECK_TRACE_COUNT("error", 0);
-335 }
-336 
-337 void test_negative_number_too_small() {
-338   Hide_errors = true;
-339   parse_and_load(
-340       "== code 0x1\n"
-341       "01 -12345\n"
-342   );
-343   CHECK_TRACE_CONTENTS(
-344       "error: token '-12345' is not a hex byte\n"
-345   );
-346 }
-347 
-348 void test_hex_prefix() {
-349   parse_and_load(
-350       "== code 0x1\n"
-351       "0x01 -0x02\n"
-352   );
-353   CHECK_TRACE_COUNT("error", 0);
-354 }
-355 
-356 void test_repeated_segment_merges_data() {
-357   parse_and_load(
-358       "== code 0x1\n"
-359       "11 22\n"
-360       "== code\n"  // again
-361       "33 44\n"
-362   );
-363   CHECK_TRACE_CONTENTS(
-364       "parse: new segment 'code'\n"
-365       "parse: appending to segment 'code'\n"
-366       // first segment
-367       "load: 0x00000001 -> 11\n"
-368       "load: 0x00000002 -> 22\n"
-369       // second segment
-370       "load: 0x00000003 -> 33\n"
-371       "load: 0x00000004 -> 44\n"
-372   );
-373 }
-374 
-375 void test_error_on_missing_segment_header() {
-376   Hide_errors = true;
-377   parse_and_load(
-378       "01 02\n"
-379   );
-380   CHECK_TRACE_CONTENTS(
-381       "error: input does not start with a '==' section header\n"
-382   );
-383 }
-384 
-385 void test_error_on_uppercase_hex() {
-386   Hide_errors = true;
-387   parse_and_load(
-388       "== code\n"
-389       "01 Ab\n"
-390   );
-391   CHECK_TRACE_CONTENTS(
-392       "error: uppercase hex not allowed: Ab\n"
-393   );
-394 }
-395 
-396 //: helper for tests
-397 void parse_and_load(const string& text_bytes) {
-398   program p;
-399   istringstream in(text_bytes);
-400   parse(in, p);
-401   if (trace_contains_errors()) return;  // if any stage raises errors, stop immediately
-402   load(p);
-403 }
-404 
-405 //:: run
-406 
-407 :(before "End Initialize Op Names")
-408 put_new(Name, "b8", "copy imm32 to EAX (mov)");
-409 
-410 //: our first opcode
-411 
-412 :(before "End Single-Byte Opcodes")
-413 case 0xb8: {  // copy imm32 to EAX
-414   const int32_t src = next32();
-415   trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to EAX" << end();
-416   Reg[EAX].i = src;
-417   break;
-418 }
-419 
-420 :(code)
-421 void test_copy_imm32_to_EAX_again() {
-422   run(
-423       "== code 0x1\n"  // code segment
-424       // op     ModR/M  SIB   displacement  immediate
-425       "  b8                                 0a 0b 0c 0d \n"  // copy 0x0d0c0b0a to EAX
-426   );
-427   CHECK_TRACE_CONTENTS(
-428       "run: copy imm32 0x0d0c0b0a to EAX\n"
-429   );
-430 }
-431 
-432 // read a 32-bit int in little-endian order from the instruction stream
-433 int32_t next32() {
-434   int32_t result = read_mem_i32(EIP);
-435   EIP+=4;
-436   return result;
-437 }
-438 
-439 //:: helpers
-440 
-441 string to_string(const word& w) {
-442   ostringstream out;
-443   out << w.data;
-444   for (int i = 0;  i < SIZE(w.metadata);  ++i)
-445     out << " /" << w.metadata.at(i);
-446   return out.str();
-447 }
-448 
-449 bool contains_uppercase(const string& s) {
-450   for (int i = 0;  i < SIZE(s);  ++i)
-451     if (isupper(s.at(i))) return true;
-452   return false;
-453 }
-
- - - -- cgit 1.4.1-2-gfad0