1 //: Start allowing us to not specify precise addresses for the start of each
  2 //: segment.
  3 //: This gives up a measure of control in placing code and data.
  4 
  5 :(scenario segment_name)
  6 == code
  7 05/add 0x0d0c0b0a/imm32  # add 0x0d0c0b0a to EAX
  8 # code starts at 0x08048000 + p_offset, which is 0x54 for a single-segment binary
  9 +load: 0x08048054 -> 05
 10 +load: 0x08048055 -> 0a
 11 +load: 0x08048056 -> 0b
 12 +load: 0x08048057 -> 0c
 13 +load: 0x08048058 -> 0d
 14 +run: add imm32 0x0d0c0b0a to reg EAX
 15 +run: storing 0x0d0c0b0a
 16 
 17 //: Update the parser to handle non-numeric segment name.
 18 //:
 19 //: We'll also support repeated segments with non-numeric names.
 20 //: When we encounter a new reference to an existing segment we'll *prepend*
 21 //: the new data to existing data for the segment.
 22 
 23 :(before "End Globals")
 24 map</*name*/string, int> Segment_index;
 25 bool Currently_parsing_named_segment = false;  // global to permit cross-layer communication
 26 int Currently_parsing_segment_index = -1;  // global to permit cross-layer communication
 27 :(before "End Reset")
 28 Segment_index.clear();
 29 Currently_parsing_named_segment = false;
 30 Currently_parsing_segment_index = -1;
 31 
 32 :(before "End Segment Parsing Special-cases(segment_title)")
 33 if (!starts_with(segment_title, "0x")) {
 34   Currently_parsing_named_segment = true;
 35   if (!contains_key(Segment_index, segment_title)) {
 36     trace(99, "parse") << "new segment '" << segment_title << "'" << end();
 37     if (segment_title == "code")
 38       put(Segment_index, segment_title, 0);
 39     else if (segment_title == "data")
 40       put(Segment_index, segment_title, 1);
 41     else
 42       put(Segment_index, segment_title, max(2, SIZE(out.segments)));
 43     out.segments.push_back(segment());
 44   }
 45   else {
 46     trace(99, "parse") << "prepending to segment '" << segment_title << "'" << end();
 47   }
 48   Currently_parsing_segment_index = get(Segment_index, segment_title);
 49 }
 50 
 51 :(before "End flush(p, lines) Special-cases")
 52 if (Currently_parsing_named_segment) {
 53   if (p.segments.empty() || Currently_parsing_segment_index < 0) {
 54     raise << "input does not start with a '==' section header\n" << end();
 55     return;
 56   }
 57   trace(99, "parse") << "flushing to segment" << end();
 58   vector<line>& curr_segment_data = p.segments.at(Currently_parsing_segment_index).lines;
 59   curr_segment_data.insert(curr_segment_data.begin(), lines.begin(), lines.end());
 60   lines.clear();
 61   Currently_parsing_named_segment = false;
 62   Currently_parsing_segment_index = -1;
 63   return;
 64 }
 65 
 66 :(scenario repeated_segment_merges_data)
 67 == code
 68 05/add 0x0d0c0b0a/imm32  # add 0x0d0c0b0a to EAX
 69 == code
 70 2d/subtract 0xddccbbaa/imm32  # subtract 0xddccbbaa from EAX
 71 +parse: new segment 'code'
 72 +parse: prepending to segment 'code'
 73 +load: 0x08048054 -> 2d
 74 +load: 0x08048055 -> aa
 75 +load: 0x08048056 -> bb
 76 +load: 0x08048057 -> cc
 77 +load: 0x08048058 -> dd
 78 +load: 0x08048059 -> 05
 79 +load: 0x0804805a -> 0a
 80 +load: 0x0804805b -> 0b
 81 +load: 0x0804805c -> 0c
 82 +load: 0x0804805d -> 0d
 83 
 84 //: compute segment address
 85 
 86 :(before "End Level-2 Transforms")
 87 Transform.push_back(compute_segment_starts);
 88 
 89 :(code)
 90 void compute_segment_starts(program& p) {
 91   trace(99, "transform") << "-- compute segment addresses" << end();
 92   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
 93   for (size_t i = 0;  i < p.segments.size();  ++i) {
 94     segment& curr = p.segments.at(i);
 95     if (curr.start == 0) {
 96       curr.start = CODE_START + i*SEGMENT_SIZE + p_offset;
 97       trace(99, "transform") << "segment " << i << " begins at address 0x" << HEXWORD << curr.start << end();
 98     }
 99     p_offset += size_of(curr);
100     assert(p_offset < SEGMENT_SIZE);  // for now we get less and less available space in each successive segment
101   }
102 }
103 
104 uint32_t size_of(const segment& s) {
105   uint32_t sum = 0;
106   for (int i = 0;  i < SIZE(s.lines);  ++i)
107     sum += num_bytes(s.lines.at(i));
108   return sum;
109 }
110 
111 // Assumes all bitfields are packed.
112 uint32_t num_bytes(const line& inst) {
113   uint32_t sum = 0;
114   for (int i = 0;  i < SIZE(inst.words);  ++i) {
115     const word& curr = inst.words.at(i);
116     if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32"))  // only multi-byte operands
117       sum += 4;
118     // End num_bytes(curr) Special-cases
119     else
120       sum++;
121   }
122   return sum;
123 }
124 
125 //: Dependencies:
126 //: - We'd like to compute segment addresses before setting up global variables,
127 //:   because computing addresses for global variables requires knowing where
128 //:   the data segment starts.
129 //: - We'd like to finish expanding labels before computing segment addresses,
130 //:   because it would make computing the sizes of segments more self-contained
131 //:   (num_bytes).
132 //:
133 //: Decision: compute segment addresses before expanding labels, by being
134 //: aware in this layer of certain operand types that will eventually occupy
135 //: multiple bytes.
136 //:
137 //: The layer to expand labels later hooks into num_bytes() to teach this
138 //: layer that labels occupy zero space in the binary.