about summary refs log tree commit diff stats
path: root/subx/apps/pack.subx
blob: bad21e0f78b6a5337bc1a2c26ffccec8a570c9fc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# Read a text file of SubX instructions from stdin, and convert it into a list
# of whitespace-separated ascii hex bytes on stdout, suitable to be further
# processed by apps/hex.
#
# To run (from the subx/ directory):
#   $ ./subx translate *.subx apps/pack.subx -o apps/pack
#   $ echo '05/add-to-EAX 0x20/imm32'  |./subx run apps/pack
# Expected output:
#   05 20 00 00 00  # 05/add-to-EAX 0x20/imm32
# The original instruction gets included as a comment at the end of each
# converted line.
#
# There's zero error-checking. For now we assume the input program is valid.
# We'll continue to rely on the C++ version for error messages.
#
# Label definitions and uses are left untouched for a future 'pass'.

== code
#   instruction                     effective address                                                   register    displacement    immediate
# . op          subop               mod             rm32          base        index         scale       r32
# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes

# main: run tests if necessary, convert stdin if not
    # . prolog
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # - if argc > 1 and argv[1] == "test" then return run_tests()
    # . argc > 1
    81          7/subop/compare     1/mod/*+disp8   5/rm32/EBP    .           .             .           .           0/disp8         1/imm32           # compare *EBP
    7e/jump-if-lesser-or-equal  $run-main/disp8
    # . argv[1] == "test"
    # . . push args
    68/push  "test"/imm32
    ff          6/subop/push        1/mod/*+disp8   5/rm32/EBP    .           .             .           .           8/disp8         .                 # push *(EBP+8)
    # . . call
    e8/call  kernel-string-equal/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
    # . check result
    3d/compare-EAX  1/imm32
    75/jump-if-not-equal  $run-main/disp8
    # . run-tests()
    e8/call  run-tests/disp32
    8b/copy                         0/mod/indirect  5/rm32/.disp32            .             .           3/r32/EBX   Num-test-failures/disp32          # copy *Num-test-failures to EBX
    eb/jump  $main:end/disp8
$run-main:
    # - otherwise convert stdin
    # var ed/EAX : exit-descriptor
    81          5/subop/subtract    3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # subtract from ESP
    89/copy                         3/mod/direct    0/rm32/EAX    .           .             .           4/r32/ESP   .               .                 # copy ESP to EAX
    # configure ed to really exit()
    # . ed->target = 0
    c7          0/subop/copy        0/mod/direct    0/rm32/EAX    .           .             .           .           .               0/imm32           # copy to *EAX
    # return convert(Stdin, 1/stdout, 2/stderr, ed)
    # . . push args
    50/push-EAX/ed
    68/push  Stderr/imm32
    68/push  Stdout/imm32
    68/push  Stdin/imm32
    # . . call
    e8/call  convert/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0x10/imm32        # add to ESP
    # . syscall(exit, 0)
    bb/copy-to-EBX  0/imm32
$main:end:
    b8/copy-to-EAX  1/imm32/exit
    cd/syscall  0x80/imm8

# - big picture
# We'll operate on each line/instruction in isolation. That way we only need to
# allocate memory for converting a single instruction.
#
# To pack an entire file:
#   skip segment headers
#   pack every instruction in the code segment
#   skip other segments

# - To pack an instruction, following the C++ version:
# read line
# parse words
# read first word as opcode and emit
# if 0f or f2 or f3 read second opcode and emit
# if 'f2 0f' or 'f3 0f' read third opcode and emit
# scan words
#   if has metadata 'mod', parse into mod
#   if has metadata 'rm32', parse into rm32
#   if has metadata 'r32', parse into r32
#   if has metadata 'subop', parse into r32
# if at least one of the 3 was present, emit modrm byte
# scan words
#   if has metadata 'base', parse into base
#   if has metadata 'index', parse into index
#   if has metadata 'scale', parse into scale
# if at least one of the 3 was present, emit sib byte
# parse errors => <abort>
# scan words
#   if has metadata 'disp8', emit-maybe
#   if has metadata 'disp16', emit-maybe as 2 bytes
#   if has metadata 'disp32', emit-maybe as 4 bytes
# scan words
#   if has metadata 'imm8', emit-maybe
#   if has metadata 'imm32', emit-maybe as 4 bytes
# finally, emit line prefixed with a '  # '

# simplifications since we perform zero error handling (continuing to rely on the C++ version for that):
#   missing fields are always 0-filled
#   bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte.
#   in case of conflict, last operand with a name is recognized
#   silently drop extraneous operands
#   unceremoniously abort on non-numeric operands except disp or imm

# primary state: line
#   stream of 512 bytes; abort if it ever overflows
#
# conceptual hierarchy within a line:
#   line = words separated by ' ', maybe followed by comment starting with '#'
#   word = name until '/', then 0 or more metadata separated by '/'
#
# we won't bother saving the internal structure of lines; reparsing should be cheap using two primitives:
#   next-token(stream, delim char) -> slice (start, end pointers)
#   slice-equal?(slice, kernel string)

# helpers:
#   new-stream(length int, elemsize int) -- allocate length*elemsize bytes, initialize first word with length*elemsize
#   read-line(in : &buffered-file, line : stream byte, err : &buffered-file, ed : &exit-descriptor)
#   next-word(line : stream byte, out : &slice)
#     responsible for skipping whitespace and comments
#   next-token(line : stream byte, delim : byte, out : &slice)
#     return (0, 0) sentinel on hitting comment or end of array
#   slice-empty?(in : &slice) -> bool
#   slice-equal?(in : &slice, s : &kernel-string) -> bool
#   is-hex-int(in : &slice)
#   parse-hex-int(in : &slice) -> int
#   emit-maybe(out : &buffered-file, n : int, width : int)
#   emit-hex-int(out : &buffered-file, n : int)
#   emit(out : &buffered-file, word : &slice)
#   has-metadata?(word : &slice, s : &kernel-string) -> bool

convert:  # in : (address buffered-file), out : (address buffered-file), err : (address buffered-file), ed : (address exit-descriptor) -> <void>
    # pseudocode:
    #   line = new-stream(512, 1)
    #   repeatedly
    #     clear-stream(line)
    #     EAX = read-line(in, line, err, ed)
    #     if EAX == EOF break
    #     convert-instruction(line, out, err, ed)
    #   flush(out)
    #
    # . prolog
    55/push-EBP
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # . save registers
    # . restore registers
    # . epilog
    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
    5d/pop-to-EBP
    c3/return

# (re)compute the bounds of the next word in the line
next-word:  # line : (address stream byte), out : (address slice)
    # . prolog
    55/push-EBP
    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
    # . save registers
    # skip-whitespace(line)
    # . . push args
    ff          6/subop/push        1/mod/*+disp8   5/rm32/EBP    .           .             .           .           8/disp8         .                 # push *(EBP+8)
    # . . call
    e8/call  skip-whitespace/disp32
    # . . discard args
    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               4/imm32           # add to ESP
    # if line->data[line->read] == '#' return (&line->data[line->read], &line->data[line->write])
    # return next-token(line, ' ')
    # . restore registers
    # . epilog
    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
    5d/pop-to-EBP
    c3/return

== data

# . . vim:nowrap:textwidth=0