about summary refs log tree commit diff stats
path: root/subx/053new_segment.subx
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2018-11-30 10:54:42 -0800
committerKartik Agaram <vc@akkartik.com>2018-11-30 10:54:42 -0800
commit6030d7e2e56d445ca67c6a0e8c9cf33e46bc673c (patch)
tree83b6fc800dc5c90295b58e5b0494b083636efbaa /subx/053new_segment.subx
parente9661581f092f3e210b7bd900af058d8b8c4369e (diff)
downloadmu-6030d7e2e56d445ca67c6a0e8c9cf33e46bc673c.tar.gz
4801
Reindent all SubX code to make some room for the new comment style.
Diffstat (limited to 'subx/053new_segment.subx')
-rw-r--r--subx/053new_segment.subx86
1 files changed, 43 insertions, 43 deletions
diff --git a/subx/053new_segment.subx b/subx/053new_segment.subx
index c9a710e3..69115575 100644
--- a/subx/053new_segment.subx
+++ b/subx/053new_segment.subx
@@ -1,62 +1,62 @@
 # Create a new segment (for data) using mmap().
 
 == code
-# instruction                     effective address                                                   operand     displacement    immediate
-# op          subop               mod             rm32          base        index         scale       r32
-# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+#   instruction                     effective address                                                   operand     displacement    immediate
+#   op          subop               mod             rm32          base        index         scale       r32
+#   1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 
 # main:  (manual test if this is the last file loaded)
-  # EAX = new-segment(0x1000)
+    # EAX = new-segment(0x1000)
     # push args
-  68/push  0x1000/imm32
+    68/push  0x1000/imm32
     # call
-  e8/call  new-segment/disp32
+    e8/call  new-segment/disp32
     # discard args
-  81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32                 # add to ESP
+    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32                 # add to ESP
 
-  # store to *EAX
-  c7/copy                         0/mod/direct    0/rm32/EAX    .           .             .           .           .               0x34/imm32              # copy to *EAX
+    # store to *EAX
+    c7/copy                         0/mod/direct    0/rm32/EAX    .           .             .           .           .               0x34/imm32              # copy to *EAX
 
-  # exit(EAX)
-  89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                       # copy EAX to EBX
-  b8/copy-to-EAX  1/imm32/exit
-  cd/syscall  0x80/imm8
+    # exit(EAX)
+    89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           0/r32/EAX   .               .                       # copy EAX to EBX
+    b8/copy-to-EAX  1/imm32/exit
+    cd/syscall  0x80/imm8
 
 new-segment:  # len : int -> address
-  # prolog
-  55/push-EBP
-  89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                       # copy ESP to EBP
-  53/push-EBX
-  # copy len to mmap-new-segment.len
-  # TODO: compute mmap-new-segment+4 before runtime
-  8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           0/r32/EAX   8/disp8         .                       # copy *(EBP+8) to EAX
-  bb/copy-to-EBX  mmap-new-segment/imm32
-  89/copy                         1/mod/*+disp8   3/rm32/EBX    .           .             .           0/r32/EAX   4/disp8         .                       # copy EAX to *(EBX+4)
-  # mmap(mmap-new-segment)
-  bb/copy-to-EBX  mmap-new-segment/imm32
-  b8/copy-to-EAX  0x5a/imm32/mmap
-  cd/syscall  0x80/imm8
-  # epilog
-  5b/pop-to-EBX
-  89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                       # copy EBP to ESP
-  5d/pop-to-EBP
-  c3/return
+    # prolog
+    55/push-EBP
+    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                       # copy ESP to EBP
+    53/push-EBX
+    # copy len to mmap-new-segment.len
+    # TODO: compute mmap-new-segment+4 before runtime
+    8b/copy                         1/mod/*+disp8   4/rm32/sib    5/base/EBP  4/index/none  .           0/r32/EAX   8/disp8         .                       # copy *(EBP+8) to EAX
+    bb/copy-to-EBX  mmap-new-segment/imm32
+    89/copy                         1/mod/*+disp8   3/rm32/EBX    .           .             .           0/r32/EAX   4/disp8         .                       # copy EAX to *(EBX+4)
+    # mmap(mmap-new-segment)
+    bb/copy-to-EBX  mmap-new-segment/imm32
+    b8/copy-to-EAX  0x5a/imm32/mmap
+    cd/syscall  0x80/imm8
+    # epilog
+    5b/pop-to-EBX
+    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                       # copy EBP to ESP
+    5d/pop-to-EBP
+    c3/return
 
 == data
 
 # various constants used here were found in the Linux sources (search for file mman-common.h)
 mmap-new-segment:  # type mmap_arg_struct
-  # addr
-  00 00 00 00  # null
-  # len
-  00 00 00 00  # 0x1000
-  # protection flags
-  03 00 00 00  # PROT_READ | PROT_WRITE
-  # sharing flags
-  22 00 00 00  # MAP_PRIVATE | MAP_ANONYMOUS
-  # fd
-  ff ff ff ff  # -1 since MAP_ANONYMOUS is specified
-  # offset
-  00 00 00 00  # 0 since MAP_ANONYMOUS is specified
+    # addr
+    00 00 00 00  # null
+    # len
+    00 00 00 00  # 0x1000
+    # protection flags
+    03 00 00 00  # PROT_READ | PROT_WRITE
+    # sharing flags
+    22 00 00 00  # MAP_PRIVATE | MAP_ANONYMOUS
+    # fd
+    ff ff ff ff  # -1 since MAP_ANONYMOUS is specified
+    # offset
+    00 00 00 00  # 0 since MAP_ANONYMOUS is specified
 
 # vim:nowrap:textwidth=0
n> ^
f02ca8df ^




6b64f800 ^
53e6c50a ^
880bfb78 ^
6b64f800 ^
880bfb78 ^
4718a77c ^
880bfb78 ^
6b64f800 ^







071afeff ^

6b64f800 ^






071afeff ^






















































6b64f800 ^

03e72be6 ^



6b64f800 ^







12adb3da ^


071afeff ^
6b64f800 ^

071afeff ^
12adb3da ^
071afeff ^
12adb3da ^
071afeff ^
12adb3da ^
071afeff ^
d1bbbc9a ^
4718a77c ^

071afeff ^




03e72be6 ^









f02ca8df ^









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
                                                                          
                                                                             
 












                                                                              








                                    
 


                      


                                                                                                                                                         
     
                                                                                                                                                                          
                                        
 
                                  
                                    

       

                                                         

                                   
                                 
                                                                                                                                  






                                               
                                                                                       



















                                                                                                  
                                            
                                                                 



                                                                                                            




                                                                                                      
                                                
                                                                                                           
                  
                                                                                                          
                                          
                                                                                                                 
                                                                                    







                                                

                                                                                               






                                 






















































                                                                                                                                              

 



                                      







                                                                             


                                                                                                                                                         
           

      
                                     
                                                                                                                                                                          
           
                                                                                                                                       
           
                                                                                                                                       
            
      

                                         




                                                









                                                                                                                                                                          









                                                                                                                                                                          
//: Labels are defined by ending names with a ':'. This layer will compute
//: addresses for labels, and compute the offset for instructions using them.

//: We're introducing non-number names for the first time, so it's worth
//: laying down some ground rules all transforms will follow, so things don't
//: get too confusing:
//:   - if it starts with a digit, it's treated as a number. If it can't be
//:     parsed as hex it will raise an error.
//:   - if it starts with '-' it's treated as a number.
//:   - if it starts with '0x' it's treated as a number.
//:   - if it's two characters long, it can't be a name. Either it's a hex
//:     byte, or it raises an error.
//: That's it. Names can start with any non-digit that isn't a dash. They can
//: be a single character long. 'a' is not a hex number, it's a variable.
//: Later layers may add more conventions partitioning the space of names. But
//: the above rules will remain inviolate.
:(code)
bool is_number(const string& s) {
  if (s.at(0) == '-') return true;
  if (isdigit(s.at(0))) return true;
  return SIZE(s) == 2;
}
void test_is_number() {
  CHECK(!is_number("a"));
}

:(scenarios transform)
:(scenario map_label)
== 0x1
          # instruction                     effective address                                                   operand     displacement    immediate
          # op          subop               mod             rm32          base        index         scale       r32
          # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
loop:
            05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+transform: label 'loop' is at address 1

:(before "End Level-2 Transforms")
Transform.push_back(rewrite_labels);

:(code)
void rewrite_labels(program& p) {
  trace(99, "transform") << "-- rewrite labels" << end();
  if (p.segments.empty()) return;
  segment& code = p.segments.at(0);
  // Rewrite Labels(segment code)
  map<string, int32_t> address;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
  compute_addresses_for_labels(code, address);
  if (trace_contains_errors()) return;
  drop_labels(code);
  if (trace_contains_errors()) return;
  replace_labels_with_addresses(code, address);
}

void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
  int current_byte = 0;
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    const line& inst = code.lines.at(i);
    for (int j = 0;  j < SIZE(inst.words);  ++j) {
      const word& curr = inst.words.at(j);
      // hack: if we have any operand metadata left after previous transforms,
      // deduce its size
      // Maybe we should just move this transform to before instruction
      // packing, and deduce the size of *all* operands. But then we'll also
      // have to deal with bitfields.
      if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
        if (*curr.data.rbegin() == ':')
          raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
        current_byte += 4;
      }
      // automatically handle /disp8 and /imm8 here
      else if (*curr.data.rbegin() != ':') {
        ++current_byte;
      }
      else {
        string label = drop_last(curr.data);
        // ensure labels look sufficiently different from raw hex
        if (SIZE(label) <= 2) {
          raise << "label '" << label << "' is too short; must be more than two characters long\n" << end();
          return;
        }
        // ensure labels look sufficiently different from hex literals
        if (label.substr(0, 2) == "0x") {
          raise << "label '" << label << "' looks like a hex number; use a different name\n" << end();
          return;
        }
        if (contains_any_operand_metadata(curr))
          raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
        if (j > 0)
          raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
        put(address, label, current_byte);
        trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
        // no modifying current_byte; label definitions won't be in the final binary
      }
    }
  }
}

void drop_labels(segment& code) {
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
    inst.words.erase(new_end, inst.words.end());
  }
}

bool is_label(const word& w) {
  return *w.data.rbegin() == ':';
}

void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
  int32_t byte_next_instruction_starts_at = 0;
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    byte_next_instruction_starts_at += num_bytes(inst);
    line new_inst;
    for (int j = 0;  j < SIZE(inst.words);  ++j) {
      const word& curr = inst.words.at(j);
      if (contains_key(address, curr.data)) {
        int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
        if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
          if (offset > 0xff || offset < -0x7f)
            raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
          else
            emit_hex_bytes(new_inst, offset, 1);
        }
        else if (has_metadata(curr, "disp16")) {
          if (offset > 0xffff || offset < -0x7fff)
            raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
          else
            emit_hex_bytes(new_inst, offset, 2);
        }
        else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
          emit_hex_bytes(new_inst, offset, 4);
        }
      }
      else {
        new_inst.words.push_back(curr);
      }
    }
    inst.words.swap(new_inst.words);
    trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
  }
}

// Assumes all bitfields are packed.
uint32_t num_bytes(const line& inst) {
  uint32_t sum = 0;
  for (int i = 0;  i < SIZE(inst.words);  ++i) {
    const word& curr = inst.words.at(i);
    if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))  // only multi-byte operands
      sum += 4;
    else
      sum++;
  }
  return sum;
}

string data_to_string(const line& inst) {
  ostringstream out;
  for (int i = 0;  i < SIZE(inst.words);  ++i) {
    if (i > 0) out << ' ';
    out << inst.words.at(i).data;
  }
  return out.str();
}

string drop_last(const string& s) {
  return string(s.begin(), --s.end());
}

//: Label definitions must be the first word on a line. No jumping inside
//: instructions.
//: They should also be the only word on a line.
//: However, you can absolutely have multiple labels map to the same address,
//: as long as they're on separate lines.

:(scenario multiple_labels_at)
== 0x1
          # instruction                     effective address                                                   operand     displacement    immediate
          # op          subop               mod             rm32          base        index         scale       r32
          # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
# address 1
loop:
loop2:
# address 1 (labels take up no space)
            05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
# address 6
            eb                                                                                                              loop2/disp8
# address 8
            eb                                                                                                              loop3/disp8
# address 10
loop3:
+transform: label 'loop' is at address 1
+transform: label 'loop2' is at address 1
+transform: label 'loop3' is at address 10
# first jump is to -7
+transform: instruction after transform: 'eb f9'
# second jump is to 0 (fall through)
+transform: instruction after transform: 'eb 00'

:(scenario label_too_short)
% Hide_errors = true;
== 0x1
          # instruction                     effective address                                                   operand     displacement    immediate
          # op          subop               mod             rm32          base        index         scale       r32
          # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
xz:
            05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+error: label 'xz' is too short; must be more than two characters long

:(scenario label_hex)
% Hide_errors = true;
== 0x1
          # instruction                     effective address                                                   operand     displacement    immediate
          # op          subop               mod             rm32          base        index         scale       r32
          # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
0xab:
            05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
+error: label '0xab' looks like a hex number; use a different name