1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
# Read a text file of SubX instructions from stdin, and convert it into a list
# of whitespace-separated ascii hex bytes on stdout, suitable to be further
# processed by apps/hex.
#
# To run (from the subx/ directory):
# $ ./subx translate *.subx apps/pack.subx -o apps/pack
# $ echo '05/add-to-EAX 0x20/imm32' |./subx run apps/pack
# Expected output:
# 05 20 00 00 00 # 05/add-to-EAX 0x20/imm32
# The original instruction gets included as a comment at the end of each
# converted line.
#
# There's zero error-checking. For now we assume the input program is valid.
# We'll continue to rely on the C++ version for error messages.
#
# Label definitions and uses are left untouched for a future 'pass'.
== code
# instruction effective address register displacement immediate
# . op subop mod rm32 base index scale r32
# . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes
# main: run tests if necessary, convert stdin if not
# . prolog
89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP
# - if argc > 1 and argv[1] == "test" then return run_tests()
# . argc > 1
81 7/subop/compare 1/mod/*+disp8 5/rm32/EBP . . . . 0/disp8 1/imm32 # compare *EBP
7e/jump-if-lesser-or-equal $run-main/disp8
# . argv[1] == "test"
# . . push args
68/push "test"/imm32
ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8)
# . . call
e8/call kernel-string-equal/disp32
# . . discard args
81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP
# . check result
3d/compare-EAX 1/imm32
75/jump-if-not-equal $run-main/disp8
# . run-tests()
e8/call run-tests/disp32
8b/copy 0/mod/indirect 5/rm32/.disp32 . . 3/r32/EBX Num-test-failures/disp32 # copy *Num-test-failures to EBX
eb/jump $main:end/disp8
$run-main:
# - otherwise convert stdin
# var ed/EAX : exit-descriptor
81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # subtract from ESP
89/copy 3/mod/direct 0/rm32/EAX . . . 4/r32/ESP . . # copy ESP to EAX
# configure ed to really exit()
# . ed->target = 0
c7 0/subop/copy 0/mod/direct 0/rm32/EAX . . . . . 0/imm32 # copy to *EAX
# return convert(Stdin, 1/stdout, 2/stderr, ed)
# . . push args
50/push-EAX/ed
68/push Stderr/imm32
68/push Stdout/imm32
68/push Stdin/imm32
# . . call
e8/call convert/disp32
# . . discard args
81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x10/imm32 # add to ESP
# . syscall(exit, 0)
bb/copy-to-EBX 0/imm32
$main:end:
b8/copy-to-EAX 1/imm32/exit
cd/syscall 0x80/imm8
# - big picture
# We'll operate on each line/instruction in isolation. That way we only need to
# allocate memory for converting a single instruction.
#
# To pack an entire file:
# skip segment headers
# pack every instruction in the code segment
# skip other segments
# - To pack an instruction, following the C++ version:
# read line
# parse words
# read first word as opcode and emit
# if 0f or f2 or f3 read second opcode and emit
# if 'f2 0f' or 'f3 0f' read third opcode and emit
# scan words
# if has metadata 'mod', parse into mod
# if has metadata 'rm32', parse into rm32
# if has metadata 'r32', parse into r32
# if has metadata 'subop', parse into r32
# if at least one of the 3 was present, emit modrm byte
# scan words
# if has metadata 'base', parse into base
# if has metadata 'index', parse into index
# if has metadata 'scale', parse into scale
# if at least one of the 3 was present, emit sib byte
# parse errors => <abort>
# scan words
# if has metadata 'disp8', emit-maybe
# if has metadata 'disp16', emit-maybe as 2 bytes
# if has metadata 'disp32', emit-maybe as 4 bytes
# scan words
# if has metadata 'imm8', emit-maybe
# if has metadata 'imm32', emit-maybe as 4 bytes
# finally, emit line prefixed with a ' # '
# simplifications since we perform zero error handling (continuing to rely on the C++ version for that):
# missing fields are always 0-filled
# bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte.
# in case of conflict, last operand with a name is recognized
# silently drop extraneous operands
# unceremoniously abort on non-numeric operands except disp or imm
# primary state: line
# stream of 512 bytes; abort if it ever overflows
#
# conceptual hierarchy within a line:
# line = words separated by ' ', maybe followed by comment starting with '#'
# word = name until '/', then 0 or more metadata separated by '/'
#
# we won't bother saving the internal structure of lines; reparsing should be cheap using two primitives:
# next-token(stream, delim char) -> slice (start, end pointers)
# slice-equal?(slice, kernel string)
# helpers:
# new-stream(length int, elemsize int) -- allocate length*elemsize bytes, initialize first word with length*elemsize
# read-line(in : &buffered-file, line : stream byte, err : &buffered-file, ed : &exit-descriptor)
# next-word(line : stream byte, out : &slice)
# responsible for skipping whitespace and comments
# next-token(line : stream byte, delim : byte, out : &slice)
# return (0, 0) sentinel on hitting comment or end of array
# slice-empty?(in : &slice) -> bool
# slice-equal?(in : &slice, s : &kernel-string) -> bool
# is-hex-int(in : &slice)
# parse-hex-int(in : &slice) -> int
# emit-maybe(out : &buffered-file, n : int, width : int)
# emit-hex-int(out : &buffered-file, n : int)
# emit(out : &buffered-file, word : &slice)
# has-metadata?(word : &slice, s : &kernel-string) -> bool
convert: # in : (address buffered-file), out : (address buffered-file), err : (address buffered-file), ed : (address exit-descriptor) -> <void>
# pseudocode:
# line = new-stream(512, 1)
# repeatedly
# clear-stream(line)
# EAX = read-line(in, line, err, ed)
# if EAX == EOF break
# convert-instruction(line, out, err, ed)
# flush(out)
#
# . prolog
55/push-EBP
89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP
# . save registers
# . restore registers
# . epilog
89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP
5d/pop-to-EBP
c3/return
# (re)compute the bounds of the next word in the line
next-word: # line : (address stream byte), out : (address slice)
# . prolog
55/push-EBP
89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP
# . save registers
# skip-whitespace(line)
# . . push args
ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8)
# . . call
e8/call skip-whitespace/disp32
# . . discard args
81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP
# if line->data[line->read] == '#' return (&line->data[line->read], &line->data[line->write])
# return next-token(line, ' ')
# . restore registers
# . epilog
89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP
5d/pop-to-EBP
c3/return
== data
# . . vim:nowrap:textwidth=0
|