1 //: Labels are defined by ending names with a ':'. This layer will compute
  2 //: addresses for labels, and compute the offset for instructions using them.
  3 
  4 :(scenarios transform)
  5 :(scenario map_label)
  6 == 0x1
  7           # instruction                     effective address                                                   operand     displacement    immediate
  8           # op          subop               mod             rm32          base        index         scale       r32
  9           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 10 loop:
 11             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
 12 +transform: label 'loop' is at address 1
 13 
 14 :(before "End Transforms")
 15 Transform.push_back(rewrite_labels);
 16 
 17 :(code)
 18 void rewrite_labels(program& p) {
 19   trace(99, "transform") << "-- rewrite labels" << end();
 20   if (p.segments.empty()) return;
 21   segment& code = p.segments.at(0);
 22   map<string, int32_t> address;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
 23   compute_addresses_for_labels(code, address);
 24   if (trace_contains_errors()) return;
 25   drop_labels(code);
 26   if (trace_contains_errors()) return;
 27   replace_labels_with_addresses(code, address);
 28 }
 29 
 30 void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
 31   int current_byte = 0;
 32   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 33     const line& inst = code.lines.at(i);
 34     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 35       const word& curr = inst.words.at(j);
 36       // hack: if we have any operand metadata left after previous transforms,
 37       // deduce its size
 38       // Maybe we should just move this transform to before instruction
 39       // packing, and deduce the size of *all* operands. But then we'll also
 40       // have to deal with bitfields.
 41       if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
 42         if (*curr.data.rbegin() == ':')
 43           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
 44         current_byte += 4;
 45       }
 46       // automatically handle /disp8 and /imm8 here
 47       else if (*curr.data.rbegin() != ':') {
 48         ++current_byte;
 49       }
 50       else {
 51         if (contains_any_operand_metadata(curr))
 52           raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
 53         if (j > 0)
 54           raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
 55         string label = curr.data.substr(0, SIZE(curr.data)-1);
 56         put(address, label, current_byte);
 57         trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
 58         // no modifying current_byte; label definitions won't be in the final binary
 59       }
 60     }
 61   }
 62 }
 63 
 64 void drop_labels(segment& code) {
 65   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 66     line& inst = code.lines.at(i);
 67     vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
 68     inst.words.erase(new_end, inst.words.end());
 69   }
 70 }
 71 
 72 bool is_label(const word& w) {
 73   return *w.data.rbegin() == ':';
 74 }
 75 
 76 void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
 77   int32_t byte_next_instruction_starts_at = 0;
 78   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 79     line& inst = code.lines.at(i);
 80     byte_next_instruction_starts_at += num_bytes(inst);
 81     line new_inst;
 82     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 83       const word& curr = inst.words.at(j);
 84       if (contains_key(address, curr.data)) {
 85         int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
 86         if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
 87           if (offset > 0xff || offset < -0x7f)
 88             raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
 89           else
 90             emit_hex_bytes(new_inst, offset, 1);
 91         }
 92         else if (has_metadata(curr, "disp16")) {
 93           if (offset > 0xffff || offset < -0x7fff)
 94             raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
 95           else
 96             emit_hex_bytes(new_inst, offset, 2);
 97         }
 98         else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
 99           emit_hex_bytes(new_inst, offset, 4);
100         }
101       }
102       else {
103         new_inst.words.push_back(curr);
104       }
105     }
106     inst.words.swap(new_inst.words);
107     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
108   }
109 }
110 
111 // Assumes all bitfields are packed.
112 uint32_t num_bytes(const line& inst) {
113   uint32_t sum = 0;
114   for (int i = 0;  i < SIZE(inst.words);  ++i) {
115     const word& curr = inst.words.at(i);
116     if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))  // only multi-byte operands
117       sum += 4;
118     else
119       sum++;
120   }
121   return sum;
122 }
123 
124 string data_to_string(const line& inst) {
125   ostringstream out;
126   for (int i = 0;  i < SIZE(inst.words);  ++i) {
127     if (i > 0) out << ' ';
128     out << inst.words.at(i).data;
129   }
130   return out.str();
131 }
132 
133 //: Label definitions must be the first word on a line. No jumping inside
134 //: instructions.
135 //: They should also be the only word on a line.
136 //: However, you can absolutely have multiple labels map to the same address,
137 //: as long as they're on separate lines.
138 
139 :(scenario multiple_labels_at)
140 == 0x1
141           # instruction                     effective address                                                   operand     displacement    immediate
142           # op          subop               mod             rm32          base        index         scale       r32
143           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
144 # address 1
145 loop:
146 loop2:
147 # address 1 (labels take up no space)
148             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
149 # address 6
150             eb                                                                                                              loop2/disp8
151 # address 8
152             eb                                                                                                              loop3/disp8
153 # address 10
154 loop3:
155 +transform: label 'loop' is at address 1
156 +transform: label 'loop2' is at address 1
157 +transform: label 'loop3' is at address 10
158 # first jump is to -7
159 +transform: instruction after transform: 'eb f9'
160 # second jump is to 0 (fall through)
161 +transform: instruction after transform: 'eb 00'