https://github.com/akkartik/mu/blob/master/subx/034compute_segment_address.cc
  1 //: Start allowing us to not specify precise addresses for the start of each
  2 //: segment.
  3 //: This gives up a measure of control in placing code and data.
  4 
  5 void test_segment_name() {
  6   run(
  7       "== code\n"
  8       "05/add-to-EAX  0x0d0c0b0a/imm32\n"
  9       // code starts at 0x08048000 + p_offset, which is 0x54 for a single-segment binary
 10   );
 11   CHECK_TRACE_CONTENTS(
 12       "load: 0x09000054 -> 05\n"
 13       "load: 0x09000055 -> 0a\n"
 14       "load: 0x09000056 -> 0b\n"
 15       "load: 0x09000057 -> 0c\n"
 16       "load: 0x09000058 -> 0d\n"
 17       "run: add imm32 0x0d0c0b0a to reg EAX\n"
 18       "run: storing 0x0d0c0b0a\n"
 19   );
 20 }
 21 
 22 //: Update the parser to handle non-numeric segment name.
 23 //:
 24 //: We'll also support repeated segments with non-numeric names.
 25 
 26 :(before "End Globals")
 27 map</*name*/string, int> Segment_index;
 28 bool Currently_parsing_named_segment = false;  // global to permit cross-layer communication
 29 int Currently_parsing_segment_index = -1;  // global to permit cross-layer communication
 30 :(before "End Reset")
 31 Segment_index.clear();
 32 Currently_parsing_named_segment = false;
 33 Currently_parsing_segment_index = -1;
 34 
 35 :(before "End Segment Parsing Special-cases(segment_title)")
 36 if (!starts_with(segment_title, "0x")) {
 37   Currently_parsing_named_segment = true;
 38   if (!contains_key(Segment_index, segment_title)) {
 39     trace(3, "parse") << "new segment '" << segment_title << "'" << end();
 40     if (out.segments.empty() && segment_title != "code") {
 41       raise << "first segment must be 'code' but is '" << segment_title << "'\n" << end();
 42       return;
 43     }
 44     if (SIZE(out.segments) == 1 && segment_title != "data") {
 45       raise << "second segment must be 'data' but is '" << segment_title << "'\n" << end();
 46       return;
 47     }
 48     put(Segment_index, segment_title, SIZE(out.segments));
 49     out.segments.push_back(segment());
 50   }
 51   else {
 52     trace(3, "parse") << "appending to segment '" << segment_title << "'" << end();
 53   }
 54   Currently_parsing_segment_index = get(Segment_index, segment_title);
 55 }
 56 
 57 :(before "End flush(p, lines) Special-cases")
 58 if (Currently_parsing_named_segment) {
 59   assert(!p.segments.empty());
 60   trace(3, "parse") << "flushing segment" << end();
 61   vector<line>& curr_segment_data = p.segments.at(Currently_parsing_segment_index).lines;
 62   curr_segment_data.insert(curr_segment_data.end(), lines.begin(), lines.end());
 63   lines.clear();
 64   Currently_parsing_named_segment = false;
 65   Currently_parsing_segment_index = -1;
 66   return;
 67 }
 68 
 69 :(code)
 70 void test_repeated_segment_merges_data() {
 71   run(
 72       "== code\n"
 73       "05/add-to-EAX  0x0d0c0b0a/imm32\n"
 74       "== code\n"  // again
 75       "2d/subtract-from-EAX  0xddccbbaa/imm32\n"
 76   );
 77   CHECK_TRACE_CONTENTS(
 78       "parse: new segment 'code'\n"
 79       "parse: appending to segment 'code'\n"
 80       // first segment
 81       "load: 0x09000054 -> 05\n"
 82       "load: 0x09000055 -> 0a\n"
 83       "load: 0x09000056 -> 0b\n"
 84       "load: 0x09000057 -> 0c\n"
 85       "load: 0x09000058 -> 0d\n"
 86       // second segment
 87       "load: 0x09000059 -> 2d\n"
 88       "load: 0x0900005a -> aa\n"
 89       "load: 0x0900005b -> bb\n"
 90       "load: 0x0900005c -> cc\n"
 91       "load: 0x0900005d -> dd\n"
 92   );
 93 }
 94 
 95 void test_error_on_missing_segment_header() {
 96   Hide_errors = true;
 97   run(
 98       "05/add-to-EAX 0/imm32\n"
 99   );
100   CHECK_TRACE_CONTENTS(
101       "error: input does not start with a '==' section header\n"
102   );
103 }
104 
105 void test_error_on_first_segment_not_code() {
106   Hide_errors = true;
107   run(
108       "== data\n"
109       "05 00 00 00 00\n"
110   );
111   CHECK_TRACE_CONTENTS(
112       "error: first segment must be 'code' but is 'data'\n"
113   );
114 }
115 
116 void test_error_on_second_segment_not_data() {
117   Hide_errors = true;
118   run(
119       "== code\n"
120       "05/add-to-EAX 0/imm32\n"
121       "== bss\n"
122       "05 00 00 00 00\n"
123   );
124   CHECK_TRACE_CONTENTS(
125       "error: second segment must be 'data' but is 'bss'\n"
126   );
127 }
128 
129 //: compute segment address
130 
131 :(before "End Level-2 Transforms")
132 Transform.push_back(compute_segment_starts);
133 
134 :(code)
135 void compute_segment_starts(program& p) {
136   trace(3, "transform") << "-- compute segment addresses" << end();
137   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
138   for (size_t i = 0;  i < p.segments.size();  ++i) {
139     segment& curr = p.segments.at(i);
140     if (curr.start == 0) {
141       curr.start = CODE_SEGMENT + i*SPACE_FOR_SEGMENT + p_offset;
142       trace(99, "transform") << "segment " << i << " begins at address 0x" << HEXWORD << curr.start << end();
143     }
144     p_offset += size_of(curr);
145     assert(p_offset < SEGMENT_ALIGNMENT);  // for now we get less and less available space in each successive segment
146   }
147 }
148 
149 uint32_t size_of(const segment& s) {
150   uint32_t sum = 0;
151   for (int i = 0;  i < SIZE(s.lines);  ++i)
152     sum += num_bytes(s.lines.at(i));
153   return sum;
154 }
155 
156 // Assumes all bitfields are packed.
157 uint32_t num_bytes(const line& inst) {
158   uint32_t sum = 0;
159   for (int i = 0;  i < SIZE(inst.words);  ++i)
160     sum += size_of(inst.words.at(i));
161   return sum;
162 }
163 
164 int size_of(const word& w) {
165   if (has_operand_metadata(w, "disp32") || has_operand_metadata(w, "imm32"))
166     return 4;
167   else if (has_operand_metadata(w, "disp16"))
168     return 2;
169   // End size_of(word w) Special-cases
170   else
171     return 1;
172 }
173 
174 //: Dependencies:
175 //: - We'd like to compute segment addresses before setting up global variables,
176 //:   because computing addresses for global variables requires knowing where
177 //:   the data segment starts.
178 //: - We'd like to finish expanding labels before computing segment addresses,
179 //:   because it would make computing the sizes of segments more self-contained
180 //:   (num_bytes).
181 //:
182 //: Decision: compute segment addresses before expanding labels, by being
183 //: aware in this layer of certain operand types that will eventually occupy
184 //: multiple bytes.
185 //:
186 //: The layer to expand labels later hooks into num_bytes() to teach this
187 //: layer that labels occupy zero space in the binary.