# Read a text file of SubX instructions from stdin, and convert it into a list # of whitespace-separated ascii hex bytes on stdout, suitable to be further # processed by apps/hex. # # To run (from the subx/ directory): # $ ./subx translate *.subx apps/pack.subx -o apps/pack # $ echo '05/add-to-EAX 0x20/imm32' |./subx run apps/pack # Expected output: # 05 20 00 00 00 # 05/add-to-EAX 0x20/imm32 # The original instruction gets included as a comment at the end of each # converted line. # # There's zero error-checking. For now we assume the input program is valid. # We'll continue to rely on the C++ version for error messages. # # Label definitions and uses are left untouched for a future 'pass'. == code # instruction effective address register displacement immediate # . op subop mod rm32 base index scale r32 # . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes # main: run tests if necessary, convert stdin if not # . prolog 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # - if argc > 1 and argv[1] == "test" then return run_tests() # . argc > 1 81 7/subop/compare 1/mod/*+disp8 5/rm32/EBP . . . . 0/disp8 1/imm32 # compare *EBP 7e/jump-if-lesser-or-equal $run-main/disp8 # . argv[1] == "test" # . . push args 68/push "test"/imm32 ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call kernel-string-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP # . check result 3d/compare-EAX 1/imm32 75/jump-if-not-equal $run-main/disp8 # . run-tests() e8/call run-tests/disp32 8b/copy 0/mod/indirect 5/rm32/.disp32 . . 3/r32/EBX Num-test-failures/disp32 # copy *Num-test-failures to EBX eb/jump $main:end/disp8 $run-main: # - otherwise convert stdin # var ed/EAX : exit-descriptor 81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # subtract from ESP 89/copy 3/mod/direct 0/rm32/EAX . . . 4/r32/ESP . . # copy ESP to EAX # configure ed to really exit() # . ed->target = 0 c7 0/subop/copy 0/mod/direct 0/rm32/EAX . . . . . 0/imm32 # copy to *EAX # return convert(Stdin, 1/stdout, 2/stderr, ed) # . . push args 50/push-EAX/ed 68/push Stderr/imm32 68/push Stdout/imm32 68/push Stdin/imm32 # . . call e8/call convert/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x10/imm32 # add to ESP # . syscall(exit, 0) bb/copy-to-EBX 0/imm32 $main:end: b8/copy-to-EAX 1/imm32/exit cd/syscall 0x80/imm8 # - big picture # We'll operate on each line/instruction in isolation. That way we only need to # allocate memory for converting a single instruction. # # To pack an entire file: # skip segment headers # pack every instruction in the code segment # skip other segments # - To pack an instruction, following the C++ version: # read line # parse words # read first word as opcode and emit # if 0f or f2 or f3 read second opcode and emit # if 'f2 0f' or 'f3 0f' read third opcode and emit # scan words # if has metadata 'mod', parse into mod # if has metadata 'rm32', parse into rm32 # if has metadata 'r32', parse into r32 # if has metadata 'subop', parse into r32 # if at least one of the 3 was present, emit modrm byte # scan words # if has metadata 'base', parse into base # if has metadata 'index', parse into index # if has metadata 'scale', parse into scale # if at least one of the 3 was present, emit sib byte # parse errors => # scan words # if has metadata 'disp8', emit-maybe # if has metadata 'disp16', emit-maybe as 2 bytes # if has metadata 'disp32', emit-maybe as 4 bytes # scan words # if has metadata 'imm8', emit-maybe # if has metadata 'imm32', emit-maybe as 4 bytes # finally, emit line prefixed with a ' # ' # simplifications since we perform zero error handling (continuing to rely on the C++ version for that): # missing fields are always 0-filled # bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte. # in case of conflict, last operand with a name is recognized # silently drop extraneous operands # unceremoniously abort on non-numeric operands except disp or imm # primary state: line # stream of 512 bytes; abort if it ever overflows # # conceptual hierarchy within a line: # line = words separated by ' ', maybe followed by comment starting with '#' # word = name until '/', then 0 or more metadata separated by '/' # # we won't bother saving the internal structure of lines; reparsing should be cheap using two primitives: # next-token(stream, delim char) -> slice (start, end pointers) # slice-equal?(slice, kernel string) # helpers: # new-stream(length int, elemsize int) -- allocate length*elemsize bytes, initialize first word with length*elemsize # read-line(in : &buffered-file, line : stream byte, err : &buffered-file, ed : &exit-descriptor) # next-word(line : stream byte, out : &slice) # responsible for skipping whitespace and comments # next-token(line : stream byte, delim : byte, out : &slice) # return (0, 0) sentinel on hitting comment or end of array # slice-empty?(in : &slice) -> bool # slice-equal?(in : &slice, s : &kernel-string) -> bool # is-hex-int(in : &slice) # parse-hex-int(in : &slice) -> int # emit-maybe(out : &buffered-file, n : int, width : int) # emit-hex-int(out : &buffered-file, n : int) # emit(out : &buffered-file, word : &slice) # has-metadata?(word : &slice, s : &kernel-string) -> bool convert: # in : (address buffered-file), out : (address buffered-file), err : (address buffered-file), ed : (address exit-descriptor) -> # pseudocode: # line = new-stream(512, 1) # repeatedly # clear-stream(line) # EAX = read-line(in, line, err, ed) # if EAX == EOF break # convert-instruction(line, out, err, ed) # flush(out) # # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers # . restore registers # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return # (re)compute the bounds of the next word in the line next-word: # line : (address stream byte), out : (address slice) # . prolog 55/push-EBP 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP # . save registers # skip-whitespace(line) # . . push args ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) # . . call e8/call skip-whitespace/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 4/imm32 # add to ESP # if line->data[line->read] == '#' return (&line->data[line->read], &line->data[line->write]) # return next-token(line, ' ') # . restore registers # . epilog 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP 5d/pop-to-EBP c3/return == data # . . vim:nowrap:textwidth=0