Mu - subx/011run.cc

From 6e1eeeebfb453fa7c871869c19375ce60fbd7413 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sat, 27 Jul 2019 16:01:55 -0700 Subject: 5485 - promote SubX to top-level --- html/subx/011run.cc.html | 541 ----------------------------------------------- 1 file changed, 541 deletions(-) delete mode 100644 html/subx/011run.cc.html (limited to 'html/subx/011run.cc.html') diff --git a/html/subx/011run.cc.html b/html/subx/011run.cc.html deleted file mode 100644 index e6b2eb21..00000000 --- a/html/subx/011run.cc.html +++ /dev/null @@ -1,541 +0,0 @@ - - - - -Mu - subx/011run.cc - - - - - - - - - - -https://github.com/akkartik/mu/blob/master/subx/011run.cc -
-  1 //: Running SubX programs on the VM.
-  2 
-  3 //: (Not to be confused with the 'run' subcommand for running ELF binaries on
-  4 //: the VM. That comes later.)
-  5 
-  6 :(before "End Help Texts")
-  7 put_new(Help, "syntax",
-  8   "SubX programs consist of segments, each segment in turn consisting of lines.\n"
-  9   "Line-endings are significant; each line should contain a single\n"
- 10   "instruction, macro or directive.\n"
- 11   "\n"
- 12   "Comments start with the '#' character. It should be at the start of a word\n"
- 13   "(start of line, or following a space).\n"
- 14   "\n"
- 15   "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
- 16   "the segment and a (sometimes approximate) starting address in memory.\n"
- 17   "The name 'code' is special; instructions to execute should always go here.\n"
- 18   "\n"
- 19   "The resulting binary starts running from the start of the segment by default.\n"
- 20   "To start elsewhere in the code segment, define a special label called 'Entry'.\n"
- 21   "\n"
- 22   "Segments with the same name get merged together. This rule helps keep functions and\n"
- 23   "their data close together in .subx files.\n"
- 24   "You don't have to specify the starting address after the first time.\n"
- 25   "\n"
- 26   "Lines consist of a series of words. Words can contain arbitrary metadata\n"
- 27   "after a '/', but they can never contain whitespace. Metadata has no effect\n"
- 28   "at runtime, but can be handy when rewriting macros.\n"
- 29   "\n"
- 30   "Check out the examples in the examples/ directory.\n"
- 31   "Programming in machine code can be annoying, but let's see if we can make\n"
- 32   "it nice enough to be able to write a compiler in it.\n"
- 33 );
- 34 :(before "End Help Contents")
- 35 cerr << "  syntax\n";
- 36 
- 37 :(code)
- 38 void test_copy_imm32_to_EAX() {
- 39   // At the lowest level, SubX programs are a series of hex bytes, each
- 40   // (variable-length) instruction on one line.
- 41   run(
- 42       // Comments start with '#' and are ignored.
- 43       "# comment\n"
- 44       // Segment headers start with '==', a name and a starting hex address.
- 45       // There's usually one code and one data segment. The code segment
- 46       // always comes first.
- 47       "== code 0x1\n"  // code segment
- 48 
- 49       // After the header, each segment consists of lines, and each line
- 50       // consists of words separated by whitespace.
- 51       //
- 52       // All words can have metadata after a '/'. No spaces allowed in
- 53       // metadata, of course.
- 54       // Unrecognized metadata never causes errors, so you can use it for
- 55       // documentation.
- 56       //
- 57       // Within the code segment in particular, x86 instructions consist of
- 58       // some number of the following parts and sub-parts (see the Readme and
- 59       // cheatsheet.pdf for details):
- 60       //   opcodes: 1-3 bytes
- 61       //   ModR/M byte
- 62       //   SIB byte
- 63       //   displacement: 0/1/2/4 bytes
- 64       //   immediate: 0/1/2/4 bytes
- 65       // opcode        ModR/M                    SIB                   displacement    immediate
- 66       // instruction   mod, reg, Reg/Mem bits    scale, index, base
- 67       // 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
- 68       "  b8            .                         .                     .               0a 0b 0c 0d\n"  // copy 0x0d0c0b0a to EAX
- 69       // The periods are just to help the eye track long gaps between columns,
- 70       // and are otherwise ignored.
- 71   );
- 72   // This program, when run, causes the following events in the trace:
- 73   CHECK_TRACE_CONTENTS(
- 74       "load: 0x00000001 -> b8\n"
- 75       "load: 0x00000002 -> 0a\n"
- 76       "load: 0x00000003 -> 0b\n"
- 77       "load: 0x00000004 -> 0c\n"
- 78       "load: 0x00000005 -> 0d\n"
- 79       "run: copy imm32 0x0d0c0b0a to EAX\n"
- 80   );
- 81 }
- 82 
- 83 // top-level helper for scenarios: parse the input, transform any macros, load
- 84 // the final hex bytes into memory, run it
- 85 void run(const string& text_bytes) {
- 86   program p;
- 87   istringstream in(text_bytes);
- 88   parse(in, p);
- 89   if (trace_contains_errors()) return;  // if any stage raises errors, stop immediately
- 90   transform(p);
- 91   if (trace_contains_errors()) return;
- 92   load(p);
- 93   if (trace_contains_errors()) return;
- 94   while (EIP < End_of_program)
- 95     run_one_instruction();
- 96 }
- 97 
- 98 //:: core data structures
- 99 
-100 :(before "End Types")
-101 struct program {
-102   vector<segment> segments;
-103   // random ideas for other things we may eventually need
-104   //map<name, address> globals;
-105   //vector<recipe> recipes;
-106   //map<string, type_info> types;
-107 };
-108 :(before "struct program")
-109 struct segment {
-110   string name;
-111   uint32_t start;
-112   vector<line> lines;
-113   // End segment Fields
-114   segment() {
-115     start = 0;
-116     // End segment Constructor
-117   }
-118 };
-119 :(before "struct segment")
-120 struct line {
-121   vector<word> words;
-122   vector<string> metadata;
-123   string original;
-124 };
-125 :(before "struct line")
-126 struct word {
-127   string original;
-128   string data;
-129   vector<string> metadata;
-130 };
-131 
-132 //:: parse
-133 
-134 :(code)
-135 void parse(istream& fin, program& out) {
-136   segment* curr_segment = NULL;
-137   vector<line> l;
-138   while (has_data(fin)) {
-139     string line_data;
-140     line curr;
-141     getline(fin, line_data);
-142     curr.original = line_data;
-143     trace(99, "parse") << "line: " << line_data << end();
-144     // End Line Parsing Special-cases(line_data -> l)
-145     istringstream lin(line_data);
-146     while (has_data(lin)) {
-147       string word_data;
-148       lin >> word_data;
-149       if (word_data.empty()) continue;
-150       if (word_data[0] == '#') break;  // comment
-151       if (word_data == ".") continue;  // comment token
-152       if (word_data == "==") {
-153         flush(curr_segment, l);
-154         string segment_name;
-155         lin >> segment_name;
-156         curr_segment = find(out, segment_name);
-157         if (curr_segment != NULL) {
-158           trace(3, "parse") << "appending to segment '" << segment_name << "'" << end();
-159         }
-160         else {
-161           trace(3, "parse") << "new segment '" << segment_name << "'" << end();
-162           uint32_t seg_start = 0;
-163           lin >> std::hex >> seg_start;
-164           sanity_check_program_segment(out, seg_start);
-165           out.segments.push_back(segment());
-166           curr_segment = &out.segments.back();
-167           curr_segment->name = segment_name;
-168           curr_segment->start = seg_start;
-169           if (trace_contains_errors()) continue;
-170           trace(3, "parse") << "starts at address 0x" << HEXWORD << curr_segment->start << end();
-171         }
-172         break;  // skip rest of line
-173       }
-174       if (word_data[0] == ':') {
-175         // todo: line metadata
-176         break;
-177       }
-178       curr.words.push_back(word());
-179       parse_word(word_data, curr.words.back());
-180       trace(99, "parse") << "word: " << to_string(curr.words.back());
-181     }
-182     if (!curr.words.empty())
-183       l.push_back(curr);
-184   }
-185   flush(curr_segment, l);
-186   trace(99, "parse") << "done" << end();
-187 }
-188 
-189 segment* find(program& p, const string& segment_name) {
-190   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-191     if (p.segments.at(i).name == segment_name)
-192       return &p.segments.at(i);
-193   }
-194   return NULL;
-195 }
-196 
-197 void flush(segment* s, vector<line>& lines) {
-198   if (lines.empty()) return;
-199   if (s == NULL) {
-200     raise << "input does not start with a '==' section header\n" << end();
-201     return;
-202   }
-203   trace(3, "parse") << "flushing segment" << end();
-204   s->lines.insert(s->lines.end(), lines.begin(), lines.end());
-205   lines.clear();
-206 }
-207 
-208 void parse_word(const string& data, word& out) {
-209   out.original = data;
-210   istringstream win(data);
-211   if (getline(win, out.data, '/')) {
-212     string m;
-213     while (getline(win, m, '/'))
-214       out.metadata.push_back(m);
-215   }
-216 }
-217 
-218 void sanity_check_program_segment(const program& p, uint32_t addr) {
-219   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-220     if (p.segments.at(i).start == addr)
-221       raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
-222   }
-223 }
-224 
-225 // helper for tests
-226 void parse(const string& text_bytes) {
-227   program p;
-228   istringstream in(text_bytes);
-229   parse(in, p);
-230 }
-231 
-232 void test_detect_duplicate_segments() {
-233   Hide_errors = true;
-234   parse(
-235       "== segment1 0xee\n"
-236       "ab\n"
-237       "== segment2 0xee\n"
-238       "cd\n"
-239   );
-240   CHECK_TRACE_CONTENTS(
-241       "error: can't have multiple segments starting at address 0x000000ee\n"
-242   );
-243 }
-244 
-245 //:: transform
-246 
-247 :(before "End Types")
-248 typedef void (*transform_fn)(program&);
-249 :(before "End Globals")
-250 vector<transform_fn> Transform;
-251 
-252 :(code)
-253 void transform(program& p) {
-254   for (int t = 0;  t < SIZE(Transform);  ++t)
-255     (*Transform.at(t))(p);
-256 }
-257 
-258 //:: load
-259 
-260 void load(const program& p) {
-261   if (find(p, "code") == NULL) {
-262     raise << "no code to run\n" << end();
-263     return;
-264   }
-265   // Ensure segments are disjoint.
-266   set<uint32_t> overlap;
-267   for (int i = 0;   i < SIZE(p.segments);  ++i) {
-268     const segment& seg = p.segments.at(i);
-269     uint32_t addr = seg.start;
-270     if (!already_allocated(addr))
-271       Mem.push_back(vma(seg.start));
-272     trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
-273     for (int j = 0;  j < SIZE(seg.lines);  ++j) {
-274       const line& l = seg.lines.at(j);
-275       for (int k = 0;  k < SIZE(l.words);  ++k) {
-276         const word& w = l.words.at(k);
-277         uint8_t val = hex_byte(w.data);
-278         if (trace_contains_errors()) return;
-279         assert(overlap.find(addr) == overlap.end());
-280         write_mem_u8(addr, val);
-281         overlap.insert(addr);
-282         trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
-283         ++addr;
-284       }
-285     }
-286     if (seg.name == "code") {
-287       End_of_program = addr;
-288       EIP = seg.start;
-289       // End Initialize EIP
-290     }
-291   }
-292 }
-293 
-294 const segment* find(const program& p, const string& segment_name) {
-295   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-296     if (p.segments.at(i).name == segment_name)
-297       return &p.segments.at(i);
-298   }
-299   return NULL;
-300 }
-301 
-302 uint8_t hex_byte(const string& s) {
-303   istringstream in(s);
-304   int result = 0;
-305   in >> std::hex >> result;
-306   if (!in || !in.eof()) {
-307     raise << "token '" << s << "' is not a hex byte\n" << end();
-308     return '\0';
-309   }
-310   if (result > 0xff || result < -0x8f) {
-311     raise << "token '" << s << "' is not a hex byte\n" << end();
-312     return '\0';
-313   }
-314   return static_cast<uint8_t>(result);
-315 }
-316 
-317 void test_number_too_large() {
-318   Hide_errors = true;
-319   parse_and_load(
-320       "== code 0x1\n"
-321       "01 cab\n"
-322   );
-323   CHECK_TRACE_CONTENTS(
-324       "error: token 'cab' is not a hex byte\n"
-325   );
-326 }
-327 
-328 void test_invalid_hex() {
-329   Hide_errors = true;
-330   parse_and_load(
-331       "== code 0x1\n"
-332       "01 cx\n"
-333   );
-334   CHECK_TRACE_CONTENTS(
-335       "error: token 'cx' is not a hex byte\n"
-336   );
-337 }
-338 
-339 void test_negative_number() {
-340   parse_and_load(
-341       "== code 0x1\n"
-342       "01 -02\n"
-343   );
-344   CHECK_TRACE_COUNT("error", 0);
-345 }
-346 
-347 void test_negative_number_too_small() {
-348   Hide_errors = true;
-349   parse_and_load(
-350       "== code 0x1\n"
-351       "01 -12345\n"
-352   );
-353   CHECK_TRACE_CONTENTS(
-354       "error: token '-12345' is not a hex byte\n"
-355   );
-356 }
-357 
-358 void test_hex_prefix() {
-359   parse_and_load(
-360       "== code 0x1\n"
-361       "0x01 -0x02\n"
-362   );
-363   CHECK_TRACE_COUNT("error", 0);
-364 }
-365 
-366 void test_repeated_segment_merges_data() {
-367   parse_and_load(
-368       "== code 0x1\n"
-369       "11 22\n"
-370       "== code\n"  // again
-371       "33 44\n"
-372   );
-373   CHECK_TRACE_CONTENTS(
-374       "parse: new segment 'code'\n"
-375       "parse: appending to segment 'code'\n"
-376       // first segment
-377       "load: 0x00000001 -> 11\n"
-378       "load: 0x00000002 -> 22\n"
-379       // second segment
-380       "load: 0x00000003 -> 33\n"
-381       "load: 0x00000004 -> 44\n"
-382   );
-383 }
-384 
-385 void test_error_on_missing_segment_header() {
-386   Hide_errors = true;
-387   parse_and_load(
-388       "01 02\n"
-389   );
-390   CHECK_TRACE_CONTENTS(
-391       "error: input does not start with a '==' section header\n"
-392   );
-393 }
-394 
-395 //: helper for tests
-396 void parse_and_load(const string& text_bytes) {
-397   program p;
-398   istringstream in(text_bytes);
-399   parse(in, p);
-400   if (trace_contains_errors()) return;  // if any stage raises errors, stop immediately
-401   load(p);
-402 }
-403 
-404 //:: run
-405 
-406 :(before "End Initialize Op Names")
-407 put_new(Name, "b8", "copy imm32 to EAX (mov)");
-408 
-409 //: our first opcode
-410 
-411 :(before "End Single-Byte Opcodes")
-412 case 0xb8: {  // copy imm32 to EAX
-413   const int32_t src = next32();
-414   trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to EAX" << end();
-415   Reg[EAX].i = src;
-416   break;
-417 }
-418 
-419 :(code)
-420 void test_copy_imm32_to_EAX_again() {
-421   run(
-422       "== code 0x1\n"  // code segment
-423       // op     ModR/M  SIB   displacement  immediate
-424       "  b8                                 0a 0b 0c 0d \n"  // copy 0x0d0c0b0a to EAX
-425   );
-426   CHECK_TRACE_CONTENTS(
-427       "run: copy imm32 0x0d0c0b0a to EAX\n"
-428   );
-429 }
-430 
-431 // read a 32-bit int in little-endian order from the instruction stream
-432 int32_t next32() {
-433   int32_t result = read_mem_i32(EIP);
-434   EIP+=4;
-435   return result;
-436 }
-437 
-438 //:: helpers
-439 
-440 string to_string(const word& w) {
-441   ostringstream out;
-442   out << w.data;
-443   for (int i = 0;  i < SIZE(w.metadata);  ++i)
-444     out << " /" << w.metadata.at(i);
-445   return out.str();
-446 }
-447 
-448 int32_t parse_int(const string& s) {
-449   if (s.empty()) return 0;
-450   istringstream in(s);
-451   in >> std::hex;
-452   if (s.at(0) == '-') {
-453     int32_t result = 0;
-454     in >> result;
-455     if (!in || !in.eof()) {
-456       raise << "not a number: " << s << '\n' << end();
-457       return 0;
-458     }
-459     return result;
-460   }
-461   uint32_t uresult = 0;
-462   in >> uresult;
-463   if (!in || !in.eof()) {
-464     raise << "not a number: " << s << '\n' << end();
-465     return 0;
-466   }
-467   return static_cast<int32_t>(uresult);
-468 }
-469 :(before "End Unit Tests")
-470 void test_parse_int() {
-471   CHECK_EQ(0, parse_int("0"));
-472   CHECK_EQ(0, parse_int("0x0"));
-473   CHECK_EQ(0, parse_int("0x0"));
-474   CHECK_EQ(16, parse_int("10"));  // hex always
-475   CHECK_EQ(-1, parse_int("-1"));
-476   CHECK_EQ(-1, parse_int("0xffffffff"));
-477 }
-
- - - -- cgit 1.4.1-2-gfad0