# Assign addresses (co-ordinates) to instructions (landmarks) in a program # (landscape). # Use the addresses assigned to: # a) replace labels # b) add segment headers with addresses and offsets correctly filled in # # To build (from the subx/ directory): # $ ./subx translate *.subx apps/survey.subx -o apps/survey # # The expected input is a stream of bytes with segment headers, comments and # some interspersed labels. # $ cat x # == code 0x1 # l1: # aa bb l1/imm8 # cc dd l2/disp32 # l2: # ee foo/imm32 # == data 0x10 # foo: # 00 # # The output is the stream of bytes without segment headers or label definitions, # and with label references replaced with numeric values/displacements. # # $ cat x |./subx run apps/assort # ...ELF header bytes... # # ELF header above will specify that code segment begins at this offset # aa bb nn # some computed address # cc dd nn nn nn nn # some computed displacement # ee nn nn nn nn # some computed address # # ELF header above will specify that data segment begins at this offset # 00 == code # instruction effective address register displacement immediate # . op subop mod rm32 base index scale r32 # . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes Entry: # Heap = new-segment(64KB) # . . push args 68/push Heap/imm32 68/push 0x10000/imm32/64KB # . . call e8/call new-segment/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # initialize-trace-stream(256KB) # . . push args 68/push 0x40000/imm32/256KB # . . call e8/call initialize-trace-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # for debugging: run a single test #? e8/call test-convert-computes-addresses/disp32 #? 8b/copy 0/mod/indirect 5/rm32/.disp32 . . 3/r32/EBX Num-test-failures/disp32 # copy *Num-test-failures to EBX #? eb/jump $main:end/disp8 # run tests if necessary, convert stdin if not # . prolog 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # initialize heap # - if argc > 1 and argv[1] == "test", then return run_tests() # . argc > 1 81 7/subop/compare 1/mod/*+disp8 5/rm32/EBP . . . . 0/disp8 1/imm32 # compare *EBP 7e/jump-if-lesser-or-equal $run-main/disp8 # . argv[1] == "test" # . . push args 68/push "test"/imm32 ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call kernel-string-equal?/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # . check result 3d/compare-EAX-and 1/imm32 75/jump-if-not-equal $run-main/disp8 # . run-tests() e8/call run-tests/disp32 8b/copy 0/mod/indirect 5/rm32/.disp32 . . 3/r32/EBX Num-test-failures/disp32 # copy *Num-test-failures to EBX eb/jump $main:end/disp8 $run-main: # - otherwise convert stdin # var ed/EAX : exit-descriptor 81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # subtract from ESP 89/copy 3/mod/direct 0/rm32/EAX . . . 4/r32/ESP . . # copy ESP to EAX # configure ed to really exit() # . ed->target = 0 c7 0/subop/copy 0/mod/direct 0/rm32/EAX . . . . . 0/imm32 # copy to *EAX # return convert(Stdin, 1/stdout, 2/stderr, ed) # . . push args 50/push-EAX/ed 68/push Stderr/imm32 68/push Stdout/imm32 68/push Stdin/imm32 # . . call e8/call convert/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x10/imm32 # add to ESP # . syscall(exit, 0) bb/copy-to-EBX 0/imm32 $main:end: b8/copy-to-EAX 1/imm32/exit cd/syscall 0x80/imm8 # data structures: # segment-info: {address, file-offset, size} (12 bytes) # segments: (address stream {string, segment-info}) (16 bytes per row) # label-info: {segment-name, segment-offset, address} (12 bytes) # labels: (address stream {string, label-info}) (16 bytes per row) convert: # in : (address buffered-file), out : (address buffered-file) -> # pseudocode # var segments = new-stream(10 rows, 16 bytes each) # var labels = new-stream(512 rows, 12 bytes each) # compute-offsets(in, segments, labels) # compute-addresses(segments, labels) # rewind-stream(in) # emit-output(in, out, segments, labels) # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers 51/push-ECX 52/push-EDX # var segments/ECX = stream(10 * 16) 81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 0xa0/imm32 # subtract from ESP 68/push 0xa0/imm32/length 68/push 0/imm32/read 68/push 0/imm32/write 89/copy 3/mod/direct 1/rm32/ECX . . . 4/r32/ESP . . # copy ESP to ECX # var labels/EDX = stream(512 * 12) 81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 0x1800/imm32 # subtract from ESP 68/push 0x1800/imm32/length 68/push 0/imm32/read 68/push 0/imm32/write 89/copy 3/mod/direct 2/rm32/EDX . . . 4/r32/ESP . . # copy ESP to EDX # compute-offsets(in, segments, labels) # . . push args 52/push-EDX 51/push-ECX ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call compute-offsets/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP # compute-addresses(segments, labels) # . . push args 52/push-EDX 51/push-ECX # . . call e8/call compute-addresses/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x10/imm32 # add to ESP # rewind-stream(in) # . . push args ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call rewind-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # emit-output(in, out, segments, labels) # . . push args 52/push-EDX 51/push-ECX ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 0xc/disp8 . # push *(EBP+12) ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call emit-output/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x10/imm32 # add to ESP $convert:end: # . reclaim locals 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x214/imm32 # add to ESP # . restore registers 5a/pop-to-EDX 59/pop-to-ECX # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return test-convert-computes-addresses: # input: # == code 0x1 # ab x/imm32 # == data 0x1000 # x: # 01 # # trace contains (in any order): # label x is at address 0x1079 # segment code starts at address 0x74 # segment code has size 5 # segment data starts at address 0x1079 # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # setup # . clear-stream(_test-input-stream) # . . push args 68/push _test-input-stream/imm32 # . . call e8/call clear-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # . clear-stream(_test-input-buffered-file+4) # . . push args b8/copy-to-EAX _test-input-buffered-file/imm32 05/add-to-EAX 4/imm32 50/push-EAX # . . call e8/call clear-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # . clear-stream(_test-output-stream) # . . push args 68/push _test-output-stream/imm32 # . . call e8/call clear-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # . clear-stream(_test-output-buffered-file+4) # . . push args b8/copy-to-EAX _test-output-buffered-file/imm32 05/add-to-EAX 4/imm32 50/push-EAX # . . call e8/call clear-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # initialize input # . write(_test-input-stream, "== code 0x1\n") # . . push args 68/push "== code 0x1\n"/imm32 68/push _test-input-stream/imm32 # . . call e8/call write/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # . write(_test-input-stream, "ab x/imm32\n") # . . push args 68/push "ab x/imm32\n"/imm32 68/push _test-input-stream/imm32 # . . call e8/call write/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # . write(_test-input-stream, "== data 0x1000\n") # . . push args 68/push "== data 0x1000\n"/imm32 68/push _test-input-stream/imm32 # . . call e8/call write/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # . write(_test-input-stream, "x:\n") # . . push args 68/push "x:\n"/imm32 68/push _test-input-stream/imm32 # . . call e8/call write/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # . write(_test-input-stream, "01\n") # . . push args 68/push "01\n"/imm32 68/push _test-input-stream/imm32 # . . call e8/call write/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # convert(_test-input-buffered-file, _test-output-buffered-file) # . . push args 68/push _test-output-buffered-file/imm32 68/push _test-input-buffered-file/imm32 # . . call e8/call convert/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # check trace # . check-trace-contains(Trace-stream, "label 'x' is at address 0x1079") # . check-trace-contains(Trace-stream, "segment 'code' starts at address 0x74") # . check-trace-contains(Trace-stream, "segment 'code' has size 0x5") # . check-trace-contains(Trace-stream, "segment 'data' starts at address 0x1079") # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return compute-offsets: # in : (address buffered-file), segments : (address stream {string, segment-info}), labels : (address stream {string, label-info}) # pseudocode: # gg # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers $compute-offsets:end: # . reclaim locals # . restore registers # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return test-compute-offsets: # input: # == code 0x1 # ab x/imm32 # == data 0x1000 # x: # 01 # # trace contains (in any order): # segment 'code' is at file offset 0 # segment 'code' has size 5 # segment 'data' is at file offset 5 # label 'x' is in segment data # label 'x' is at offset 0 # segment 'data' has size 1 # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # setup # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return compute-addresses: # segments : (address stream {string, segment-info}), labels : (address stream {string, label-info}) # pseudocode: # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers $compute-addresses:end: # . reclaim locals # . restore registers # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return test-compute-addresses: # segment-info: # - {'a', 0x1000, 5, 0} # - {'b', 0x500, 1, 0} # - {'c', 0x5444, 12, 0} # label-info: # - {'l1', 'code', 3, 0} # - {'l2', 'data', 0, 0} # # trace contains (in any order): # segment 'code starts at address 0x1074 # segment 'data starts at address 0x579 # label 'l1' is at address 0x1077 # label 'l2' is at address 0x579 # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # setup # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return emit-output: # in : (address buffered-file), out : (address buffered-file), segments : (address stream {string, segment-info}), labels : (address stream {string, label-info}) # pseudocode: # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers $emit-output:end: # . reclaim locals # . restore registers # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return == data Segment-size: 0x1000/imm32/4KB # This block of bytes gets copied to the start of the output ELF file, with # some fields filled in. # http://www.sco.com/developers/gabi/latest/ch4.eheader.html Elf_header: # - length 0x34/imm32 # - data $e_ident: 7f 45/E 4c/L 46/F 01/32-bit 01/little-endian 01/file-version 00/no-os-extensions 00 00 00 00 00 00 00 00 # 8 bytes of padding $e_type: 02 00 $e_machine: 03 00 $e_version: 1/imm32 Elf_e_entry: 0x09000000/imm32 # approximate default; must be updated $e_phoff: 0x34/imm32 # offset for the 'program header table' containing segment headers $e_shoff: 0/imm32 # no sections $e_flags: 0/imm32 # unused $e_ehsize: 0x34 00 $e_phentsize: 0x20 00 Elf_e_phnum: 00 00 # number of segments; must be updated $e_shentsize: 00 00 # no sections $e_shnum: 00 00 $e_shstrndx: 00 00 # This block of bytes gets copied after the Elf_header once for each segment. # Some fields need filling in each time. # https://docs.oracle.com/cd/E19683-01/816-1386/chapter6-83432/index.html Elf_program_header_entry: # - length 0x20/imm32 # - data $p_type: 1/imm32/PT_LOAD Elf_p_offset: 0/imm32 # byte offset in the file at which a segment begins; must be updated Elf_p_vaddr: 0/imm32 # starting address to store the segment at before running the program Elf_p_paddr: 0/imm32 # should have same value as Elf_p_vaddr Elf_p_filesz: 0/imm32 Elf_p_memsz: 0/imm32 # should have same value as Elf_p_filesz Elf_p_flags: 6/imm32/rw- # read/write/execute permissions for the segment; must be updated for the code segment Elf_p_align: # we hold this constant; changing it will require adjusting the way we # compute the starting address for each segment 0x1000/imm32 # . . vim:nowrap:textwidth=0