From 0ee4ff8193d7799e2c70bf0ce3faad1dba21402e Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Thu, 3 Jan 2019 23:11:31 -0800 Subject: 4902 - initial sketch, stage 2 of compiler I've agonized over this for a week; high time I saved a snapshot. --- subx/apps/pack.subx | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 subx/apps/pack.subx diff --git a/subx/apps/pack.subx b/subx/apps/pack.subx new file mode 100644 index 00000000..fa910ec7 --- /dev/null +++ b/subx/apps/pack.subx @@ -0,0 +1,164 @@ +# Read a text file of SubX instructions from stdin, and convert it into a list +# of whitespace-separated ascii hex bytes on stdout, suitable to be further +# processed by apps/hex. +# +# To run (from the subx/ directory): +# $ ./subx translate *.subx apps/pack.subx -o apps/pack +# $ echo '05/add-to-EAX 0x20/imm32' |./subx run apps/pack +# Expected output: +# 05 20 00 00 00 # 05/add-to-EAX 0x20/imm32 +# The original instruction gets included as a comment at the end of each +# converted line. +# +# There's zero error-checking. For now we assume the input program is valid. +# We'll continue to rely on the C++ version for error messages. +# +# Label definitions and uses are left untouched for a future 'pass'. + +== code +# instruction effective address register displacement immediate +# . op subop mod rm32 base index scale r32 +# . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes + +# main: run tests if necessary, convert stdin if not + # . prolog + 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP + # - if argc > 1 and argv[1] == "test" then return run_tests() + # . argc > 1 + 81 7/subop/compare 1/mod/*+disp8 5/rm32/EBP . . . . 0/disp8 1/imm32 # compare *EBP + 7e/jump-if-lesser-or-equal $run-main/disp8 + # . argv[1] == "test" + # . . push args + 68/push "test"/imm32 + ff 6/subop/push 1/mod/*+disp8 5/rm32/EBP . . . . 8/disp8 . # push *(EBP+8) + # . . call + e8/call kernel-string-equal/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # add to ESP + # . check result + 3d/compare-EAX 1/imm32 + 75/jump-if-not-equal $run-main/disp8 + # . run-tests() + e8/call run-tests/disp32 + 8b/copy 0/mod/indirect 5/rm32/.disp32 . . 3/r32/EBX Num-test-failures/disp32 # copy *Num-test-failures to EBX + eb/jump $main:end/disp8 +$run-main: + # - otherwise convert stdin + # var ed/EAX : exit-descriptor + 81 5/subop/subtract 3/mod/direct 4/rm32/ESP . . . . . 8/imm32 # subtract from ESP + 89/copy 3/mod/direct 0/rm32/EAX . . . 4/r32/ESP . . # copy ESP to EAX + # configure ed to really exit() + # . ed->target = 0 + c7 0/subop/copy 0/mod/direct 0/rm32/EAX . . . . . 0/imm32 # copy to *EAX + # return convert(Stdin, 1/stdout, 2/stderr, ed) + # . . push args + 50/push-EAX/ed + 68/push Stderr/imm32 + 68/push Stdout/imm32 + 68/push Stdin/imm32 + # . . call + e8/call convert/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/ESP . . . . . 0x10/imm32 # add to ESP + # . syscall(exit, 0) + bb/copy-to-EBX 0/imm32 +$main:end: + b8/copy-to-EAX 1/imm32/exit + cd/syscall 0x80/imm8 + +# - big picture +# We'll operate on each line/instruction in isolation. That way we only need to +# allocate memory for converting a single instruction. +# +# To pack an entire file: +# skip segment headers +# pack every instruction in the code segment +# skip other segments + +# - To pack an instruction, following the C++ version: +# read line +# parse words +# read first word as opcode and emit +# if 0f or f2 or f3 read second opcode and emit +# if 'f2 0f' or 'f3 0f' read third opcode and emit +# scan words +# if has metadata 'mod', parse into mod +# if has metadata 'rm32', parse into rm32 +# if has metadata 'r32', parse into r32 +# if has metadata 'subop', parse into r32 +# if at least one of the 3 was present, emit modrm byte +# scan words +# if has metadata 'base', parse into base +# if has metadata 'index', parse into index +# if has metadata 'scale', parse into scale +# if at least one of the 3 was present, emit sib byte +# parse errors => +# scan words +# if has metadata 'disp8', emit-maybe +# if has metadata 'disp16', emit-maybe as 2 bytes +# if has metadata 'disp32', emit-maybe as 4 bytes +# scan words +# if has metadata 'imm8', emit-maybe +# if has metadata 'imm32', emit-maybe as 4 bytes +# finally, emit line prefixed with a ' # ' + +# simplifications since we perform zero error handling (continuing to rely on the C++ version for that): +# missing fields are always 0-filled +# bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte. +# disp16 not recognized for now +# in case of conflict, last operand with a name is recognized +# silently drop extraneous operands +# unceremoniously abort on non-numeric operands except disp or imm + +# primary state: line +# stream of 512 bytes; abort if it ever overflows +# +# conceptual hierarchy within a line: +# line = words separated by ' ', maybe followed by comment starting with '#' +# word = name until '/', then 0 or more metadata separated by '/' +# +# we won't bother saving the internal structure of lines; reparsing should be cheap using two primitives: +# next-token(stream, delim char) -> slice (start, end pointer) +# slice-equal?(slice, kernel string) + +# helpers: +# new-stream(length int, elemsize int) -- allocate length*elemsize bytes, initialize first word with length*elemsize +# clear-stream(array) -- skip length, clear length bytes after +# read-line(in : &buffered-file, line : stream byte, err : &buffered-file, ed : &exit-descriptor) +# next-word(line : stream byte, out : &slice) +# responsible for skipping whitespace and comments +# next-token(line : stream byte, delim : byte, out : &slice) +# return (0, 0) sentinel on hitting comment or end of array +# slice-empty?(in : &slice) -> bool +# slice-equal?(in : &slice, s : &kernel-string) -> bool +# is-hex-int(in : &slice) +# parse-hex-int(in : &slice) -> int +# emit-maybe(out : &buffered-file, n : int, width : int) +# emit-hex-int(out : &buffered-file, n : int) +# emit(out : &buffered-file, word : &slice) +# has-metadata?(word : &slice, s : &kernel-string) -> bool + +# the main entry point +convert: # in : (address buffered-file), out : (address buffered-file), err : (address buffered-file), ed : (address exit-descriptor) -> + # pseudocode: + # line = new-stream(512, 1) + # repeatedly + # clear-stream(line) + # EAX = read-line(in, line, err, ed) + # if EAX == EOF break + # convert-instruction(line, out, err, ed) + # flush(out) + # + # . prolog + 55/push-EBP + 89/copy 3/mod/direct 5/rm32/EBP . . . 4/r32/ESP . . # copy ESP to EBP + # . save registers + # . restore registers + # . epilog + 89/copy 3/mod/direct 4/rm32/ESP . . . 5/r32/EBP . . # copy EBP to ESP + 5d/pop-to-EBP + c3/return + +== data + +# . . vim:nowrap:textwidth=0 -- cgit 1.4.1-2-gfad0