# Read a text file of SubX instructions from stdin, and convert it into a list # of whitespace-separated ascii hex bytes on stdout, suitable to be further # processed by apps/hex. # # To run (from the subx/ directory): # $ ./subx translate *.subx apps/pack.subx -o apps/pack # $ echo '05/add-to-EAX 0x20/imm32' |./subx run apps/pack # Expected output: # 05 20 00 00 00 # 05/add-to-EAX 0x20/imm32 # The original instruction gets included as a comment at the end of each # converted line. # # There's zero error-checking. For now we assume the input program is valid. # We'll continue to rely on the C++ version for error messages. # # Label definitions and uses are left untouched for a future 'pass'. == code # instruction effective address register displacement immediate # . op subop mod rm32 base index scale r32 # . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes # main: run tests if necessary, convert stdin if not # . prolog 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # - if argc > 1 and argv[1] == "test" then return run_tests() # . argc > 1 81 7/subop/compare 1/mod/*+disp8 5/rm32/EBP . . . . 0/disp8 1/imm32 # compare *EBP 7e/jump-if-lesser-or-equal $run-main/disp8 # . argv[1] == "test" # . . push args 68/push "test"/imm32 ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call kernel-string-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # . check result 3d/compare-EAX 1/imm32 75/jump-if-not-equal $run-main/disp8 # . run-tests() e8/call run-tests/disp32 8b/copy 0/mod/indirect 5/rm32/.disp32 . . 3/r32/EBX Num-test-failures/disp32 # copy *Num-test-failures to EBX eb/jump $main:end/disp8 $run-main: # - otherwise convert stdin # var ed/EAX : exit-descriptor 81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # subtract from ESP 89/copy 3/mod/direct 0/rm32/EAX . . . 4/r32/ESP . . # copy ESP to EAX # configure ed to really exit() # . ed->target = 0 c7 0/subop/copy 0/mod/direct 0/rm32/EAX . . . . . 0/imm32 # copy to *EAX # return convert(Stdin, 1/stdout, 2/stderr, ed) # . . push args 50/push-EAX/ed 68/push Stderr/imm32 68/push Stdout/imm32 68/push Stdin/imm32 # . . call e8/call convert/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x10/imm32 # add to ESP # . syscall(exit, 0) bb/copy-to-EBX 0/imm32 $main:end: b8/copy-to-EAX 1/imm32/exit cd/syscall 0x80/imm8 # - big picture # We'll operate on each line/instruction in isolation. That way we only need to # allocate memory for converting a single instruction. # # To pack an entire file: # skip segment headers # pack every instruction in the code segment # skip other segments # - To pack an instruction, following the C++ version: # read line # parse words # read first word as opcode and emit # if 0f or f2 or f3 read second opcode and emit # if 'f2 0f' or 'f3 0f' read third opcode and emit # scan words # if has metadata 'mod', parse into mod # if has metadata 'rm32', parse into rm32 # if has metadata 'r32', parse into r32 # if has metadata 'subop', parse into r32 # if at least one of the 3 was present, emit modrm byte # scan words # if has metadata 'base', parse into base # if has metadata 'index', parse into index # if has metadata 'scale', parse into scale # if at least one of the 3 was present, emit sib byte # parse errors => # scan words # if has metadata 'disp8', emit-maybe # if has metadata 'disp16', emit-maybe as 2 bytes # if has metadata 'disp32', emit-maybe as 4 bytes # scan words # if has metadata 'imm8', emit-maybe # if has metadata 'imm32', emit-maybe as 4 bytes # finally, emit line prefixed with a ' # ' # simplifications since we perform zero error handling (continuing to rely on the C++ version for that): # missing fields are always 0-filled # bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte. # in case of conflict, last operand with a name is recognized # silently drop extraneous operands # unceremoniously abort on non-numeric operands except disp or imm # primary state: line # stream of 512 bytes; abort if it ever overflows # # conceptual hierarchy within a line: # line = words separated by ' ', maybe followed by comment starting with '#' # word = name until '/', then 0 or more metadata separated by '/' # # we won't bother saving the internal structure of lines; reparsing should be cheap using two primitives: # next-token(stream, delim char) -> slice (start, end pointers) # slice-equal?(slice, kernel string) # helpers: # emit-maybe(out : &buffered-file, n : int, width : int) # emit(out : &buffered-file, word : &slice) # has-metadata?(word : &slice, s : &kernel-string) -> bool convert: # in : (address buffered-file), out : (address buffered-file), err : (address buffered-file), ed : (address exit-descriptor) -> # pseudocode: # line = new-stream(512, 1) # repeatedly # clear-stream(line) # EAX = read-line(in, line, err, ed) # if EAX == EOF break # convert-instruction(line, out, err, ed) # flush(out) # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers # . restore registers # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return # (re)compute the bounds of the next word in the line next-word: # line : (address stream byte), out : (address slice) # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers 50/push-EAX 51/push-ECX 56/push-ESI 57/push-EDI # ESI = line 8b/copy 1/mod/*+disp8 5/rm32/EBP . . . 6/r32/ESI 8/disp8 . # copy *(EBP+8) to ESI # EDI = out 8b/copy 1/mod/*+disp8 5/rm32/EBP . . . 7/r32/EDI 0xc/disp8 . # copy *(EBP+12) to EDI # skip-chars-matching(line, ' ') # . . push args 68/push 0x20/imm32/space ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call skip-chars-matching/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # out->start = &line->data[line->read] 8b/copy 1/mod/*+disp8 6/rm32/ESI . . . 1/r32/ECX 4/disp8 . # copy *(ESI+4) to ECX 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/ESI 1/index/ECX . 0/r32/EAX 0xc/disp8 . # copy ESI+ECX+12 to EAX 89/copy 0/mod/indirect 7/rm32/EDI . . . 0/r32/EAX . . # copy EAX to *EDI # if line->data[line->read] == '#': out->end = &line->data[line->write]), skip rest of stream and return # . EAX = line->data[line->read] 31/xor 3/mod/direct 0/rm32/EAX . . . 0/r32/EAX . . # clear EAX 8a/copy-byte 1/mod/*+disp8 4/rm32/sib 6/base/ESI 1/index/ECX . 0/r32/AL 0xc/disp8 . # copy byte at *(ESI+ECX+12) to AL # . compare 3d/compare-EAX-with 0x23/imm32/pound 75/jump-if-not-equal $next-word:not-comment/disp8 # . out->end = &line->data[line->write] 8b/copy 0/mod/indirect 6/rm32/ESI . . . 0/r32/EAX . . # copy *ESI to EAX 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/ESI 0/index/EAX . 0/r32/EAX 0xc/disp8 . # copy ESI+EAX+12 to EAX 89/copy 1/mod/*+disp8 7/rm32/EDI . . . 0/r32/EAX 4/disp8 . # copy EAX to *(EDI+4) # . line->read = line->write 89/copy 1/mod/*+disp8 6/rm32/ESI . . . 0/r32/EAX 4/disp8 . # copy EAX to *(ESI+4) # . return eb/jump $next-word:end/disp8 $next-word:not-comment: # otherwise skip-chars-not-matching(line, ' ') # . . push args 68/push 0x20/imm32/space ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call skip-chars-not-matching/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # out->end = &line->data[line->read] 8b/copy 1/mod/*+disp8 6/rm32/ESI . . . 1/r32/ECX 4/disp8 . # copy *(ESI+4) to ECX 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/ESI 1/index/ECX . 0/r32/EAX 0xc/disp8 . # copy ESI+ECX+12 to EAX 89/copy 1/mod/*+disp8 7/rm32/EDI . . . 0/r32/EAX 4/disp8 . # copy EAX to *(EDI+4) $next-word:end: # . restore registers 5f/pop-to-EDI 5e/pop-to-ESI 59/pop-to-ECX 58/pop-to-EAX # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return test-next-word: # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # setup # . clear-stream(_test-stream) # . . push args 68/push _test-stream/imm32 # . . call e8/call clear-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # var slice/ECX = {0, 0} 68/push 0/imm32/end 68/push 0/imm32/start 89/copy 3/mod/direct 1/rm32/ECX . . . 4/r32/ESP . . # copy ESP to ECX # write(_test-stream, " ab") # . . push args 68/push " ab"/imm32 68/push _test-stream/imm32 # . . call e8/call write/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # next-word(_test-stream, slice) # . . push args 51/push-ECX 68/push _test-stream/imm32 # . . call e8/call next-word/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # check-ints-equal(slice->start - _test-stream->data, 2, msg) # . check-ints-equal(slice->start - _test-stream, 14, msg) # . . push args 68/push "F - test-next-word: start"/imm32 68/push 0xe/imm32 # . . push slice->start - _test-stream 8b/copy 0/mod/indirect 1/rm32/ECX . . . 0/r32/EAX . . # copy *ECX to EAX 81 5/subop/subtract 3/mod/direct 0/rm32/EAX . . . . . _test-stream/imm32 # subtract from EAX 50/push-EAX # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP # check-ints-equal(slice->end - _test-stream->data, 4, msg) # . check-ints-equal(slice->end - _test-stream, 16, msg) # . . push args 68/push "F - test-next-word: end"/imm32 68/push 0x10/imm32 # . . push slice->end - _test-stream 8b/copy 1/mod/*+disp8 1/rm32/ECX . . . 0/r32/EAX 4/disp8 . # copy *(ECX+4) to EAX 81 5/subop/subtract 3/mod/direct 0/rm32/EAX . . . . . _test-stream/imm32 # subtract from EAX 50/push-EAX # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return test-next-word-returns-whole-comment: # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # setup # . clear-stream(_test-stream) # . . push args 68/push _test-stream/imm32 # . . call e8/call clear-stream/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # var slice/ECX = {0, 0} 68/push 0/imm32/end 68/push 0/imm32/start 89/copy 3/mod/direct 1/rm32/ECX . . . 4/r32/ESP . . # copy ESP to ECX # write(_test-stream, " # a") # . . push args 68/push " # a"/imm32 68/push _test-stream/imm32 # . . call e8/call write/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # next-word(_test-stream, slice) # . . push args 51/push-ECX 68/push _test-stream/imm32 # . . call e8/call next-word/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # check-ints-equal(slice->start - _test-stream->data, 2, msg) # . check-ints-equal(slice->start - _test-stream, 14, msg) # . . push args 68/push "F - test-next-word-returns-whole-comment: start"/imm32 68/push 0xe/imm32 # . . push slice->start - _test-stream 8b/copy 0/mod/indirect 1/rm32/ECX . . . 0/r32/EAX . . # copy *ECX to EAX 81 5/subop/subtract 3/mod/direct 0/rm32/EAX . . . . . _test-stream/imm32 # subtract from EAX 50/push-EAX # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP # check-ints-equal(slice->end - _test-stream->data, 5, msg) # . check-ints-equal(slice->end - _test-stream, 17, msg) # . . push args 68/push "F - test-next-word-returns-whole-comment: end"/imm32 68/push 0x11/imm32 # . . push slice->end - _test-stream 8b/copy 1/mod/*+disp8 1/rm32/ECX . . . 0/r32/EAX 4/disp8 . # copy *(ECX+4) to EAX 81 5/subop/subtract 3/mod/direct 0/rm32/EAX . . . . . _test-stream/imm32 # subtract from EAX 50/push-EAX # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0xc/imm32 # add to ESP # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return == data # . . vim:nowrap:textwidth=0