From 37d53a70958bfe5b1d7946229af9c12f0b865abc Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sun, 23 Sep 2018 22:38:16 -0700 Subject: 4512 --- html/subx/035labels.cc.html | 278 +++++++++++++++++++++++--------------------- 1 file changed, 144 insertions(+), 134 deletions(-) (limited to 'html/subx/035labels.cc.html') diff --git a/html/subx/035labels.cc.html b/html/subx/035labels.cc.html index 182f1d27..de507cf6 100644 --- a/html/subx/035labels.cc.html +++ b/html/subx/035labels.cc.html @@ -105,10 +105,10 @@ if ('onhashchange' in window) { 43 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes 44 loop: 45 05 0x0d0c0b0a/imm32 # add to EAX - 46 +transform: label 'loop' is at address 1 + 46 +transform: label 'loop' is at address 1 47 48 :(before "End Level-2 Transforms") - 49 Transform.push_back(rewrite_labels); + 49 Transform.push_back(rewrite_labels); 50 :(code) 51 void rewrite_labels(program& p) { 52 trace(99, "transform") << "-- rewrite labels" << end(); @@ -117,9 +117,9 @@ if ('onhashchange' in window) { 55 map<string, int32_t> byte_index; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits 56 compute_byte_indices_for_labels(code, byte_index); 57 if (trace_contains_errors()) return; - 58 drop_labels(code); + 58 drop_labels(code); 59 if (trace_contains_errors()) return; - 60 replace_labels_with_displacements(code, byte_index); + 60 replace_labels_with_displacements(code, byte_index); 61 } 62 63 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) { @@ -143,158 +143,168 @@ if ('onhashchange' in window) { 81 ++current_byte; 82 } 83 else { - 84 string label = drop_last(curr.data); + 84 string label = drop_last(curr.data); 85 // ensure labels look sufficiently different from raw hex - 86 check_valid_name(label); + 86 check_valid_name(label); 87 if (trace_contains_errors()) return; 88 if (contains_any_operand_metadata(curr)) - 89 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); + 89 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); 90 if (j > 0) 91 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end(); - 92 put(byte_index, label, current_byte); - 93 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end(); - 94 // no modifying current_byte; label definitions won't be in the final binary - 95 } - 96 } - 97 } - 98 } - 99 -100 void drop_labels(segment& code) { -101 for (int i = 0; i < SIZE(code.lines); ++i) { -102 line& inst = code.lines.at(i); -103 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label); -104 inst.words.erase(new_end, inst.words.end()); -105 } -106 } -107 -108 bool is_label(const word& w) { -109 return *w.data.rbegin() == ':'; -110 } -111 -112 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) { -113 int32_t byte_index_next_instruction_starts_at = 0; -114 for (int i = 0; i < SIZE(code.lines); ++i) { -115 line& inst = code.lines.at(i); -116 byte_index_next_instruction_starts_at += num_bytes(inst); -117 line new_inst; -118 for (int j = 0; j < SIZE(inst.words); ++j) { -119 const word& curr = inst.words.at(j); -120 if (contains_key(byte_index, curr.data)) { -121 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; -122 if (has_operand_metadata(curr, "disp8")) { -123 if (displacement > 0xff || displacement < -0x7f) -124 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); -125 else -126 emit_hex_bytes(new_inst, displacement, 1); -127 } -128 else if (has_operand_metadata(curr, "disp16")) { -129 if (displacement > 0xffff || displacement < -0x7fff) -130 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); -131 else -132 emit_hex_bytes(new_inst, displacement, 2); -133 } -134 else if (has_operand_metadata(curr, "disp32")) { -135 emit_hex_bytes(new_inst, displacement, 4); -136 } -137 } -138 else { -139 new_inst.words.push_back(curr); -140 } -141 } -142 inst.words.swap(new_inst.words); -143 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); -144 } -145 } -146 -147 string data_to_string(const line& inst) { -148 ostringstream out; -149 for (int i = 0; i < SIZE(inst.words); ++i) { -150 if (i > 0) out << ' '; -151 out << inst.words.at(i).data; -152 } -153 return out.str(); -154 } -155 -156 string drop_last(const string& s) { -157 return string(s.begin(), --s.end()); -158 } -159 -160 //: Label definitions must be the first word on a line. No jumping inside -161 //: instructions. -162 //: They should also be the only word on a line. -163 //: However, you can absolutely have multiple labels map to the same address, -164 //: as long as they're on separate lines. + 92 if (Dump_map) + 93 cerr << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n'; + 94 put(byte_index, label, current_byte); + 95 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end(); + 96 // no modifying current_byte; label definitions won't be in the final binary + 97 } + 98 } + 99 } +100 } +101 +102 :(before "End Globals") +103 bool Dump_map = false; // currently used only by 'subx translate' +104 :(before "End Commandline Options") +105 else if (is_equal(*arg, "--map")) { +106 Dump_map = true; +107 } +108 +109 :(code) +110 void drop_labels(segment& code) { +111 for (int i = 0; i < SIZE(code.lines); ++i) { +112 line& inst = code.lines.at(i); +113 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label); +114 inst.words.erase(new_end, inst.words.end()); +115 } +116 } +117 +118 bool is_label(const word& w) { +119 return *w.data.rbegin() == ':'; +120 } +121 +122 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) { +123 int32_t byte_index_next_instruction_starts_at = 0; +124 for (int i = 0; i < SIZE(code.lines); ++i) { +125 line& inst = code.lines.at(i); +126 byte_index_next_instruction_starts_at += num_bytes(inst); +127 line new_inst; +128 for (int j = 0; j < SIZE(inst.words); ++j) { +129 const word& curr = inst.words.at(j); +130 if (contains_key(byte_index, curr.data)) { +131 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; +132 if (has_operand_metadata(curr, "disp8")) { +133 if (displacement > 0xff || displacement < -0x7f) +134 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); +135 else +136 emit_hex_bytes(new_inst, displacement, 1); +137 } +138 else if (has_operand_metadata(curr, "disp16")) { +139 if (displacement > 0xffff || displacement < -0x7fff) +140 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); +141 else +142 emit_hex_bytes(new_inst, displacement, 2); +143 } +144 else if (has_operand_metadata(curr, "disp32")) { +145 emit_hex_bytes(new_inst, displacement, 4); +146 } +147 } +148 else { +149 new_inst.words.push_back(curr); +150 } +151 } +152 inst.words.swap(new_inst.words); +153 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); +154 } +155 } +156 +157 string data_to_string(const line& inst) { +158 ostringstream out; +159 for (int i = 0; i < SIZE(inst.words); ++i) { +160 if (i > 0) out << ' '; +161 out << inst.words.at(i).data; +162 } +163 return out.str(); +164 } 165 -166 :(scenario multiple_labels_at) -167 == 0x1 -168 # instruction effective address operand displacement immediate -169 # op subop mod rm32 base index scale r32 -170 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -171 # address 1 -172 loop: -173 $loop2: -174 # address 1 (labels take up no space) -175 05 0x0d0c0b0a/imm32 # add to EAX -176 # address 6 -177 eb $loop2/disp8 -178 # address 8 -179 eb $loop3/disp8 -180 # address 0xa -181 $loop3: -182 +transform: label 'loop' is at address 1 -183 +transform: label '$loop2' is at address 1 -184 +transform: label '$loop3' is at address a -185 # first jump is to -7 -186 +transform: instruction after transform: 'eb f9' -187 # second jump is to 0 (fall through) -188 +transform: instruction after transform: 'eb 00' -189 -190 :(scenario label_too_short) -191 % Hide_errors = true; -192 == 0x1 -193 # instruction effective address operand displacement immediate -194 # op subop mod rm32 base index scale r32 -195 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -196 xz: -197 05 0x0d0c0b0a/imm32 # add to EAX -198 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name +166 string drop_last(const string& s) { +167 return string(s.begin(), --s.end()); +168 } +169 +170 //: Label definitions must be the first word on a line. No jumping inside +171 //: instructions. +172 //: They should also be the only word on a line. +173 //: However, you can absolutely have multiple labels map to the same address, +174 //: as long as they're on separate lines. +175 +176 :(scenario multiple_labels_at) +177 == 0x1 +178 # instruction effective address operand displacement immediate +179 # op subop mod rm32 base index scale r32 +180 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +181 # address 1 +182 loop: +183 $loop2: +184 # address 1 (labels take up no space) +185 05 0x0d0c0b0a/imm32 # add to EAX +186 # address 6 +187 eb $loop2/disp8 +188 # address 8 +189 eb $loop3/disp8 +190 # address 0xa +191 $loop3: +192 +transform: label 'loop' is at address 1 +193 +transform: label '$loop2' is at address 1 +194 +transform: label '$loop3' is at address a +195 # first jump is to -7 +196 +transform: instruction after transform: 'eb f9' +197 # second jump is to 0 (fall through) +198 +transform: instruction after transform: 'eb 00' 199 -200 :(scenario label_hex) +200 :(scenario label_too_short) 201 % Hide_errors = true; 202 == 0x1 203 # instruction effective address operand displacement immediate 204 # op subop mod rm32 base index scale r32 205 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -206 0xab: +206 xz: 207 05 0x0d0c0b0a/imm32 # add to EAX -208 +error: '0xab' looks like a hex number; use a different name +208 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name 209 -210 :(scenario label_negative_hex) +210 :(scenario label_hex) 211 % Hide_errors = true; 212 == 0x1 213 # instruction effective address operand displacement immediate 214 # op subop mod rm32 base index scale r32 215 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -216 -a: # indent to avoid looking like a trace_should_not_contain command for this scenario +216 0xab: 217 05 0x0d0c0b0a/imm32 # add to EAX -218 +error: '-a' starts with '-', which can be confused with a negative number; use a different name +218 +error: '0xab' looks like a hex number; use a different name 219 -220 //: now that we have labels, we need to adjust segment size computation to -221 //: ignore them. -222 -223 :(scenario segment_size_ignores_labels) -224 % Mem_offset = CODE_START; -225 == code # 0x08048074 -226 05/add 0x0d0c0b0a/imm32 # 5 bytes -227 foo: # 0 bytes -228 == data # 0x08049079 -229 bar: -230 00 -231 +transform: segment 1 begins at address 0x08049079 +220 :(scenario label_negative_hex) +221 % Hide_errors = true; +222 == 0x1 +223 # instruction effective address operand displacement immediate +224 # op subop mod rm32 base index scale r32 +225 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +226 -a: # indent to avoid looking like a trace_should_not_contain command for this scenario +227 05 0x0d0c0b0a/imm32 # add to EAX +228 +error: '-a' starts with '-', which can be confused with a negative number; use a different name +229 +230 //: now that we have labels, we need to adjust segment size computation to +231 //: ignore them. 232 -233 :(before "End num_bytes(curr) Special-cases") -234 else if (is_label(curr)) -235 ; // don't count it +233 :(scenario segment_size_ignores_labels) +234 % Mem_offset = CODE_START; +235 == code # 0x08048074 +236 05/add 0x0d0c0b0a/imm32 # 5 bytes +237 foo: # 0 bytes +238 == data # 0x08049079 +239 bar: +240 00 +241 +transform: segment 1 begins at address 0x08049079 +242 +243 :(before "End num_bytes(curr) Special-cases") +244 else if (is_label(curr)) +245 ; // don't count it -- cgit 1.4.1-2-gfad0