https://github.com/akkartik/mu/blob/master/subx/034compute_segment_address.cc
  1 //: Start allowing us to not specify precise addresses for the start of each
  2 //: segment.
  3 //: This gives up a measure of control in placing code and data.
  4 
  5 :(scenario segment_name)
  6 == code
  7 05/add-to-EAX  0x0d0c0b0a/imm32
  8 # code starts at 0x08048000 + p_offset, which is 0x54 for a single-segment binary
  9 +load: 0x09000054 -> 05
 10 +load: 0x09000055 -> 0a
 11 +load: 0x09000056 -> 0b
 12 +load: 0x09000057 -> 0c
 13 +load: 0x09000058 -> 0d
 14 +run: add imm32 0x0d0c0b0a to reg EAX
 15 +run: storing 0x0d0c0b0a
 16 
 17 //: Update the parser to handle non-numeric segment name.
 18 //:
 19 //: We'll also support repeated segments with non-numeric names.
 20 
 21 :(before "End Globals")
 22 map</*name*/string, int> Segment_index;
 23 bool Currently_parsing_named_segment = false;  // global to permit cross-layer communication
 24 int Currently_parsing_segment_index = -1;  // global to permit cross-layer communication
 25 :(before "End Reset")
 26 Segment_index.clear();
 27 Currently_parsing_named_segment = false;
 28 Currently_parsing_segment_index = -1;
 29 
 30 :(before "End Segment Parsing Special-cases(segment_title)")
 31 if (!starts_with(segment_title, "0x")) {
 32   Currently_parsing_named_segment = true;
 33   if (!contains_key(Segment_index, segment_title)) {
 34     trace(3, "parse") << "new segment '" << segment_title << "'" << end();
 35     if (out.segments.empty() && segment_title != "code") {
 36       raise << "first segment must be 'code' but is '" << segment_title << "'\n" << end();
 37       return;
 38     }
 39     if (SIZE(out.segments) == 1 && segment_title != "data") {
 40       raise << "second segment must be 'data' but is '" << segment_title << "'\n" << end();
 41       return;
 42     }
 43     put(Segment_index, segment_title, SIZE(out.segments));
 44     out.segments.push_back(segment());
 45   }
 46   else {
 47     trace(3, "parse") << "appending to segment '" << segment_title << "'" << end();
 48   }
 49   Currently_parsing_segment_index = get(Segment_index, segment_title);
 50 }
 51 
 52 :(before "End flush(p, lines) Special-cases")
 53 if (Currently_parsing_named_segment) {
 54   assert(!p.segments.empty());
 55   trace(3, "parse") << "flushing segment" << end();
 56   vector<line>& curr_segment_data = p.segments.at(Currently_parsing_segment_index).lines;
 57   curr_segment_data.insert(curr_segment_data.end(), lines.begin(), lines.end());
 58   lines.clear();
 59   Currently_parsing_named_segment = false;
 60   Currently_parsing_segment_index = -1;
 61   return;
 62 }
 63 
 64 :(scenario repeated_segment_merges_data)
 65 == code
 66 05/add-to-EAX  0x0d0c0b0a/imm32
 67 == code
 68 2d/subtract-from-EAX  0xddccbbaa/imm32
 69 +parse: new segment 'code'
 70 +parse: appending to segment 'code'
 71 # first segment
 72 +load: 0x09000054 -> 05
 73 +load: 0x09000055 -> 0a
 74 +load: 0x09000056 -> 0b
 75 +load: 0x09000057 -> 0c
 76 +load: 0x09000058 -> 0d
 77 # second segment
 78 +load: 0x09000059 -> 2d
 79 +load: 0x0900005a -> aa
 80 +load: 0x0900005b -> bb
 81 +load: 0x0900005c -> cc
 82 +load: 0x0900005d -> dd
 83 
 84 :(scenario error_on_missing_segment_header)
 85 % Hide_errors = true;
 86 05/add-to-EAX 0/imm32
 87 +error: input does not start with a '==' section header
 88 
 89 :(scenario error_on_first_segment_not_code)
 90 % Hide_errors = true;
 91 == data
 92 05 00 00 00 00
 93 +error: first segment must be 'code' but is 'data'
 94 
 95 :(scenario error_on_second_segment_not_data)
 96 % Hide_errors = true;
 97 == code
 98 05/add-to-EAX 0/imm32
 99 == bss
100 05 00 00 00 00
101 +error: second segment must be 'data' but is 'bss'
102 
103 //: compute segment address
104 
105 :(before "End Level-2 Transforms")
106 Transform.push_back(compute_segment_starts);
107 
108 :(code)
109 void compute_segment_starts(program& p) {
110   trace(3, "transform") << "-- compute segment addresses" << end();
111   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
112   for (size_t i = 0;  i < p.segments.size();  ++i) {
113     segment& curr = p.segments.at(i);
114     if (curr.start == 0) {
115       curr.start = CODE_SEGMENT + i*SPACE_FOR_SEGMENT + p_offset;
116       trace(99, "transform") << "segment " << i << " begins at address 0x" << HEXWORD << curr.start << end();
117     }
118     p_offset += size_of(curr);
119     assert(p_offset < SEGMENT_ALIGNMENT);  // for now we get less and less available space in each successive segment
120   }
121 }
122 
123 uint32_t size_of(const segment& s) {
124   uint32_t sum = 0;
125   for (int i = 0;  i < SIZE(s.lines);  ++i)
126     sum += num_bytes(s.lines.at(i));
127   return sum;
128 }
129 
130 // Assumes all bitfields are packed.
131 uint32_t num_bytes(const line& inst) {
132   uint32_t sum = 0;
133   for (int i = 0;  i < SIZE(inst.words);  ++i)
134     sum += size_of(inst.words.at(i));
135   return sum;
136 }
137 
138 int size_of(const word& w) {
139   if (has_operand_metadata(w, "disp32") || has_operand_metadata(w, "imm32"))
140     return 4;
141   else if (has_operand_metadata(w, "disp16"))
142     return 2;
143   // End size_of(word w) Special-cases
144   else
145     return 1;
146 }
147 
148 //: Dependencies:
149 //: - We'd like to compute segment addresses before setting up global variables,
150 //:   because computing addresses for global variables requires knowing where
151 //:   the data segment starts.
152 //: - We'd like to finish expanding labels before computing segment addresses,
153 //:   because it would make computing the sizes of segments more self-contained
154 //:   (num_bytes).
155 //:
156 //: Decision: compute segment addresses before expanding labels, by being
157 //: aware in this layer of certain operand types that will eventually occupy
158 //: multiple bytes.
159 //:
160 //: The layer to expand labels later hooks into num_bytes() to teach this
161 //: layer that labels occupy zero space in the binary.