about summary refs log tree commit diff stats
path: root/subx/apps/pack.subx
blob: d83a539877b96b89ca32d83a688df779efd2e9d0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# Read a text file of SubX instructions from stdin, and convert it into a list
# of whitespace-separated ascii hex bytes on stdout, suitable to be further
# processed by apps/hex.
#
# To run (from the subx/ directory):
#   $ ./subx translate *.subx apps/pack.subx -o apps/pack
#   $ echo '05/add-to-EAX 0x20/imm32'  |./subx run apps/pack
# Expected output:
#   05 20 00 00 00  # 05/add-to-EAX 0x20/imm32
# The original instruction gets included as a comment at the end of each
# converted line.
#
# There's zero error-checking. For now we assume the input program is valid.
# We'll continue to rely on the C++ version for error messages.
#
# Label definitions and uses are left untouched for a future 'pass'.

== code
#   instruction                     effective address                                                   register    displacement    immediate
# . op          subop               mod             rm32          base        index         scale       r32
# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes

# main: run tests if necessary, convert stdin if not
    # . prolog
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # - if argc > 1 and argv[1] == "test" then return run_tests()
    # . argc > 1
    81          7/subop/compare     1/mod/*+disp8   5/rm32/EBP    .           .             .           .           0/disp8         1/imm32           # compare *EBP
    7e/jump-if-lesser-or-equal  $run-main/disp8
    # . argv[1] == "test"
    # . . push args
    68/push  "test"/imm32
    ff          6/subop/push        1/mod/*+disp8   5/rm32/EBP    .           .             .           .           8/disp8         .                 # push *(EBP+8)
    # . . call
    e8/call  kernel-string-equal/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # . check result
    3d/compare-EAX  1/imm32
    75/jump-if-not-equal  $run-main/disp8
    # . run-tests()
    e8/call  run-tests/disp32
    8b/copy                         0/mod/indirect  5/rm32/.disp32            .             .           3/r32/EBX   Num-test-failures/disp32          # copy *Num-test-failures to EBX
    eb/jump  $main:end/disp8
$run-main:
    # - otherwise convert stdin
    # var ed/EAX : exit-descriptor
    81          5/subop/subtract    3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # subtract from ESP
    89/copy                         3/mod/direct    0/rm32/EAX    .           .             .           4/r32/ESP   .               .                 # copy ESP to EAX
    # configure ed to really exit()
    # . ed->target = 0
    c7          0/subop/copy        0/mod/direct    0/rm32/EAX    .           .             .           .           .               0/imm32           # copy to *EAX
    # return convert(Stdin, 1/stdout, 2/stderr, ed)
    # . . push args
    50/push-EAX/ed
    68/push  Stderr/imm32
    68/push  Stdout/imm32
    68/push  Stdin/imm32
    # . . call
    e8/call  convert/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0x10/imm32        # add to ESP
    # . syscall(exit, 0)
    bb/copy-to-EBX  0/imm32
$main:end:
    b8/copy-to-EAX  1/imm32/exit
    cd/syscall  0x80/imm8

# - big picture
# We'll operate on each line/instruction in isolation. That way we only need to
# allocate memory for converting a single instruction.
#
# To pack an entire file:
#   skip segment headers
#   pack every instruction in the code segment
#   skip other segments

# - To pack an instruction, following the C++ version:
# read line
# parse words
# read first word as opcode and emit
# if 0f or f2 or f3 read second opcode and emit
# if 'f2 0f' or 'f3 0f' read third opcode and emit
# scan words
#   if has metadata 'mod', parse into mod
#   if has metadata 'rm32', parse into rm32
#   if has metadata 'r32', parse into r32
#   if has metadata 'subop', parse into r32
# if at least one of the 3 was present, emit modrm byte
# scan words
#   if has metadata 'base', parse into base
#   if has metadata 'index', parse into index
#   if has metadata 'scale', parse into scale
# if at least one of the 3 was present, emit sib byte
# parse errors => <abort>
# scan words
#   if has metadata 'disp8', emit-maybe
#   if has metadata 'disp16', emit-maybe as 2 bytes
#   if has metadata 'disp32', emit-maybe as 4 bytes
# scan words
#   if has metadata 'imm8', emit-maybe
#   if has metadata 'imm32', emit-maybe as 4 bytes
# finally, emit line prefixed with a '  # '

# simplifications since we perform zero error handling (continuing to rely on the C++ version for that):
#   missing fields are always 0-filled
#   bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte.
#   in case of conflict, last operand with a name is recognized
#   silently drop extraneous operands
#   unceremoniously abort on non-numeric operands except disp or imm

# primary state: line
#   stream of 512 bytes; abort if it ever overflows
#
# conceptual hierarchy within a line:
#   line = words separated by ' ', maybe followed by comment starting with '#'
#   word = name until '/', then 0 or more metadata separated by '/'
#
# we won't bother saving the internal structure of lines; reparsing should be cheap using two primitives:
#   next-token(stream, delim char) -> slice (start, end pointers)
#   slice-equal?(slice, kernel string)

# helpers:
#   is-hex-int(in : &slice)
#   parse-hex-int(in : &slice) -> int
#   emit-maybe(out : &buffered-file, n : int, width : int)
#   emit-hex-int(out : &buffered-file, n : int)
#   emit(out : &buffered-file, word : &slice)
#   has-metadata?(word : &slice, s : &kernel-string) -> bool

convert:  # in : (address buffered-file), out : (address buffered-file), err : (address buffered-file), ed : (address exit-descriptor) -> <void>
    # pseudocode:
    #   line = new-stream(512, 1)
    #   repeatedly
    #     clear-stream(line)
    #     EAX = read-line(in, line, err, ed)
    #     if EAX == EOF break
    #     convert-instruction(line, out, err, ed)
    #   flush(out)
    #
    # . prolog
    55/push-EBP
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # . save registers
    # . restore registers
    # . epilog
    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
    5d/pop-to-EBP
    c3/return

# (re)compute the bounds of the next word in the line
next-word:  # line : (address stream byte), out : (address slice)
    # . prolog
    55/push-EBP
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # . save registers
    50/push-EAX
    51/push-ECX
    56/push-ESI
    57/push-EDI
    # ESI = line
    8b/copy                         1/mod/*+disp8   5/rm32/EBP    .           .             .           6/r32/ESI   8/disp8         .                 # copy *(EBP+8) to ESI
    # EDI = out
    8b/copy                         1/mod/*+disp8   5/rm32/EBP    .           .             .           7/r32/EDI   0xc/disp8       .                 # copy *(EBP+12) to EDI
    # skip-chars-matching(line, ' ')
    # . . push args
    68/push  0x20/imm32/space
    ff          6/subop/push        1/mod/*+disp8   5/rm32/EBP    .           .             .           .           8/disp8         .                 # push *(EBP+8)
    # . . call
    e8/call  skip-chars-matching/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # out->start = &line->data[line->read]
    8b/copy                         1/mod/*+disp8   6/rm32/ESI    .           .             .           1/r32/ECX   4/disp8         .                 # copy *(ESI+4) to ECX
    8d/copy-address                 1/mod/*+disp8   4/rm32/sib    6/base/ESI  1/index/ECX   .           0/r32/EAX   0xc/disp8       .                 # copy ESI+ECX+12 to EAX
    89/copy                         0/mod/indirect  7/rm32/EDI    .           .             .           0/r32/EAX   .               .                 # copy EAX to *EDI
    # if line->data[line->read] == '#': out->end = &line->data[line->write]), skip rest of stream and return
    # . EAX = line->data[line->read]
    31/xor                          3/mod/direct    0/rm32/EAX    .           .             .           0/r32/EAX   .               .                 # clear EAX
    8a/copy-byte                    1/mod/*+disp8   4/rm32/sib    6/base/ESI  1/index/ECX   .           0/r32/AL    0xc/disp8       .                 # copy byte at *(ESI+ECX+12) to AL
    # . compare
    3d/compare-EAX-with  0x23/imm32/pound
    75/jump-if-not-equal  $next-word:not-comment/disp8
    # . out->end = &line->data[line->write]
    8b/copy                         0/mod/indirect  6/rm32/ESI    .           .             .           0/r32/EAX   .               .                 # copy *ESI to EAX
    8d/copy-address                 1/mod/*+disp8   4/rm32/sib    6/base/ESI  0/index/EAX   .           0/r32/EAX   0xc/disp8       .                 # copy ESI+EAX+12 to EAX
    89/copy                         1/mod/*+disp8   7/rm32/EDI    .           .             .           0/r32/EAX   4/disp8         .                 # copy EAX to *(EDI+4)
    # . line->read = line->write
    89/copy                         1/mod/*+disp8   6/rm32/ESI    .           .             .           0/r32/EAX   4/disp8         .                 # copy EAX to *(ESI+4)
    # . return
    eb/jump  $next-word:end/disp8
$next-word:not-comment:
    # otherwise skip-chars-not-matching(line, ' ')
    # . . push args
    68/push  0x20/imm32/space
    ff          6/subop/push        1/mod/*+disp8   5/rm32/EBP    .           .             .           .           8/disp8         .                 # push *(EBP+8)
    # . . call
    e8/call  skip-chars-not-matching/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # out->end = &line->data[line->read]
    8b/copy                         1/mod/*+disp8   6/rm32/ESI    .           .             .           1/r32/ECX   4/disp8         .                 # copy *(ESI+4) to ECX
    8d/copy-address                 1/mod/*+disp8   4/rm32/sib    6/base/ESI  1/index/ECX   .           0/r32/EAX   0xc/disp8       .                 # copy ESI+ECX+12 to EAX
    89/copy                         1/mod/*+disp8   7/rm32/EDI    .           .             .           0/r32/EAX   4/disp8         .                 # copy EAX to *(EDI+4)
$next-word:end:
    # . restore registers
    5f/pop-to-EDI
    5e/pop-to-ESI
    59/pop-to-ECX
    58/pop-to-EAX
    # . epilog
    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
    5d/pop-to-EBP
    c3/return

test-next-word:
    # . prolog
    55/push-EBP
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # setup
    # . clear-stream(_test-stream)
    # . . push args
    68/push  _test-stream/imm32
    # . . call
    e8/call  clear-stream/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
    # var slice/ECX = {0, 0}
    68/push  0/imm32/end
    68/push  0/imm32/start
    89/copy                         3/mod/direct    1/rm32/ECX    .           .             .           4/r32/ESP   .               .                 # copy ESP to ECX
    # write(_test-stream, "  ab")
    # . . push args
    68/push  "  ab"/imm32
    68/push  _test-stream/imm32
    # . . call
    e8/call  write/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # next-word(_test-stream, slice)
    # . . push args
    51/push-ECX
    68/push  _test-stream/imm32
    # . . call
    e8/call  next-word/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # check-ints-equal(slice->start - _test-stream->data, 2, msg)
    # . check-ints-equal(slice->start - _test-stream, 14, msg)
    # . . push args
    68/push  "F - test-next-word: start"/imm32
    68/push  0xe/imm32
    # . . push slice->start - _test-stream
    8b/copy                         0/mod/indirect  1/rm32/ECX    .           .             .           0/r32/EAX   .               .                 # copy *ECX to EAX
    81          5/subop/subtract    3/mod/direct    0/rm32/EAX    .           .             .           .           .               _test-stream/imm32 # subtract from EAX
    50/push-EAX
    # . . call
    e8/call  check-ints-equal/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
    # check-ints-equal(slice->end - _test-stream->data, 4, msg)
    # . check-ints-equal(slice->end - _test-stream, 16, msg)
    # . . push args
    68/push  "F - test-next-word: end"/imm32
    68/push  0x10/imm32
    # . . push slice->end - _test-stream
    8b/copy                         1/mod/*+disp8   1/rm32/ECX    .           .             .           0/r32/EAX   4/disp8         .                 # copy *(ECX+4) to EAX
    81          5/subop/subtract    3/mod/direct    0/rm32/EAX    .           .             .           .           .               _test-stream/imm32 # subtract from EAX
    50/push-EAX
    # . . call
    e8/call  check-ints-equal/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
    # . epilog
    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
    5d/pop-to-EBP
    c3/return

test-next-word-returns-whole-comment:
    # . prolog
    55/push-EBP
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # setup
    # . clear-stream(_test-stream)
    # . . push args
    68/push  _test-stream/imm32
    # . . call
    e8/call  clear-stream/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
    # var slice/ECX = {0, 0}
    68/push  0/imm32/end
    68/push  0/imm32/start
    89/copy                         3/mod/direct    1/rm32/ECX    .           .             .           4/r32/ESP   .               .                 # copy ESP to ECX
    # write(_test-stream, "  # a")
    # . . push args
    68/push  "  # a"/imm32
    68/push  _test-stream/imm32
    # . . call
    e8/call  write/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # next-word(_test-stream, slice)
    # . . push args
    51/push-ECX
    68/push  _test-stream/imm32
    # . . call
    e8/call  next-word/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # check-ints-equal(slice->start - _test-stream->data, 2, msg)
    # . check-ints-equal(slice->start - _test-stream, 14, msg)
    # . . push args
    68/push  "F - test-next-word-returns-whole-comment: start"/imm32
    68/push  0xe/imm32
    # . . push slice->start - _test-stream
    8b/copy                         0/mod/indirect  1/rm32/ECX    .           .             .           0/r32/EAX   .               .                 # copy *ECX to EAX
    81          5/subop/subtract    3/mod/direct    0/rm32/EAX    .           .             .           .           .               _test-stream/imm32 # subtract from EAX
    50/push-EAX
    # . . call
    e8/call  check-ints-equal/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
    # check-ints-equal(slice->end - _test-stream->data, 5, msg)
    # . check-ints-equal(slice->end - _test-stream, 17, msg)
    # . . push args
    68/push  "F - test-next-word-returns-whole-comment: end"/imm32
    68/push  0x11/imm32
    # . . push slice->end - _test-stream
    8b/copy                         1/mod/*+disp8   1/rm32/ECX    .           .             .           0/r32/EAX   4/disp8         .                 # copy *(ECX+4) to EAX
    81          5/subop/subtract    3/mod/direct    0/rm32/EAX    .           .             .           .           .               _test-stream/imm32 # subtract from EAX
    50/push-EAX
    # . . call
    e8/call  check-ints-equal/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0xc/imm32         # add to ESP
    # . epilog
    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
    5d/pop-to-EBP
    c3/return

== data

# . . vim:nowrap:textwidth=0
#n183'>183
184
185
186
187
188
189
190
191
192
193
194
195
196