about summary refs log tree commit diff stats
path: root/subx
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2019-01-03 23:11:31 -0800
committerKartik Agaram <vc@akkartik.com>2019-01-03 23:51:52 -0800
commit0ee4ff8193d7799e2c70bf0ce3faad1dba21402e (patch)
tree509420c4bb5154ed4e2e496eb42a2ba08f6378fd /subx
parent634b162a980f72ae1d822e593f7975fa8e1ad3e8 (diff)
downloadmu-0ee4ff8193d7799e2c70bf0ce3faad1dba21402e.tar.gz
4902 - initial sketch, stage 2 of compiler
I've agonized over this for a week; high time I saved a snapshot.
Diffstat (limited to 'subx')
-rw-r--r--subx/apps/pack.subx164
1 files changed, 164 insertions, 0 deletions
diff --git a/subx/apps/pack.subx b/subx/apps/pack.subx
new file mode 100644
index 00000000..fa910ec7
--- /dev/null
+++ b/subx/apps/pack.subx
@@ -0,0 +1,164 @@
+# Read a text file of SubX instructions from stdin, and convert it into a list
+# of whitespace-separated ascii hex bytes on stdout, suitable to be further
+# processed by apps/hex.
+#
+# To run (from the subx/ directory):
+#   $ ./subx translate *.subx apps/pack.subx -o apps/pack
+#   $ echo '05/add-to-EAX 0x20/imm32'  |./subx run apps/pack
+# Expected output:
+#   05 20 00 00 00  # 05/add-to-EAX 0x20/imm32
+# The original instruction gets included as a comment at the end of each
+# converted line.
+#
+# There's zero error-checking. For now we assume the input program is valid.
+# We'll continue to rely on the C++ version for error messages.
+#
+# Label definitions and uses are left untouched for a future 'pass'.
+
+== code
+#   instruction                     effective address                                                   register    displacement    immediate
+# . op          subop               mod             rm32          base        index         scale       r32
+# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+
+# main: run tests if necessary, convert stdin if not
+    # . prolog
+    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+    # - if argc > 1 and argv[1] == "test" then return run_tests()
+    # . argc > 1
+    81          7/subop/compare     1/mod/*+disp8   5/rm32/EBP    .           .             .           .           0/disp8         1/imm32           # compare *EBP
+    7e/jump-if-lesser-or-equal  $run-main/disp8
+    # . argv[1] == "test"
+    # . . push args
+    68/push  "test"/imm32
+    ff          6/subop/push        1/mod/*+disp8   5/rm32/EBP    .           .             .           .           8/disp8         .                 # push *(EBP+8)
+    # . . call
+    e8/call  kernel-string-equal/disp32
+    # . . discard args
+    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # add to ESP
+    # . check result
+    3d/compare-EAX  1/imm32
+    75/jump-if-not-equal  $run-main/disp8
+    # . run-tests()
+    e8/call  run-tests/disp32
+    8b/copy                         0/mod/indirect  5/rm32/.disp32            .             .           3/r32/EBX   Num-test-failures/disp32          # copy *Num-test-failures to EBX
+    eb/jump  $main:end/disp8
+$run-main:
+    # - otherwise convert stdin
+    # var ed/EAX : exit-descriptor
+    81          5/subop/subtract    3/mod/direct    4/rm32/ESP    .           .             .           .           .               8/imm32           # subtract from ESP
+    89/copy                         3/mod/direct    0/rm32/EAX    .           .             .           4/r32/ESP   .               .                 # copy ESP to EAX
+    # configure ed to really exit()
+    # . ed->target = 0
+    c7          0/subop/copy        0/mod/direct    0/rm32/EAX    .           .             .           .           .               0/imm32           # copy to *EAX
+    # return convert(Stdin, 1/stdout, 2/stderr, ed)
+    # . . push args
+    50/push-EAX/ed
+    68/push  Stderr/imm32
+    68/push  Stdout/imm32
+    68/push  Stdin/imm32
+    # . . call
+    e8/call  convert/disp32
+    # . . discard args
+    81          0/subop/add         3/mod/direct    4/rm32/ESP    .           .             .           .           .               0x10/imm32        # add to ESP
+    # . syscall(exit, 0)
+    bb/copy-to-EBX  0/imm32
+$main:end:
+    b8/copy-to-EAX  1/imm32/exit
+    cd/syscall  0x80/imm8
+
+# - big picture
+# We'll operate on each line/instruction in isolation. That way we only need to
+# allocate memory for converting a single instruction.
+#
+# To pack an entire file:
+#   skip segment headers
+#   pack every instruction in the code segment
+#   skip other segments
+
+# - To pack an instruction, following the C++ version:
+# read line
+# parse words
+# read first word as opcode and emit
+# if 0f or f2 or f3 read second opcode and emit
+# if 'f2 0f' or 'f3 0f' read third opcode and emit
+# scan words
+#   if has metadata 'mod', parse into mod
+#   if has metadata 'rm32', parse into rm32
+#   if has metadata 'r32', parse into r32
+#   if has metadata 'subop', parse into r32
+# if at least one of the 3 was present, emit modrm byte
+# scan words
+#   if has metadata 'base', parse into base
+#   if has metadata 'index', parse into index
+#   if has metadata 'scale', parse into scale
+# if at least one of the 3 was present, emit sib byte
+# parse errors => <abort>
+# scan words
+#   if has metadata 'disp8', emit-maybe
+#   if has metadata 'disp16', emit-maybe as 2 bytes
+#   if has metadata 'disp32', emit-maybe as 4 bytes
+# scan words
+#   if has metadata 'imm8', emit-maybe
+#   if has metadata 'imm32', emit-maybe as 4 bytes
+# finally, emit line prefixed with a '  # '
+
+# simplifications since we perform zero error handling (continuing to rely on the C++ version for that):
+#   missing fields are always 0-filled
+#   bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte.
+#   disp16 not recognized for now
+#   in case of conflict, last operand with a name is recognized
+#   silently drop extraneous operands
+#   unceremoniously abort on non-numeric operands except disp or imm
+
+# primary state: line
+#   stream of 512 bytes; abort if it ever overflows
+#
+# conceptual hierarchy within a line:
+#   line = words separated by ' ', maybe followed by comment starting with '#'
+#   word = name until '/', then 0 or more metadata separated by '/'
+#
+# we won't bother saving the internal structure of lines; reparsing should be cheap using two primitives:
+#   next-token(stream, delim char) -> slice (start, end pointer)
+#   slice-equal?(slice, kernel string)
+
+# helpers:
+#   new-stream(length int, elemsize int) -- allocate length*elemsize bytes, initialize first word with length*elemsize
+#   clear-stream(array) -- skip length, clear length bytes after
+#   read-line(in : &buffered-file, line : stream byte, err : &buffered-file, ed : &exit-descriptor)
+#   next-word(line : stream byte, out : &slice)
+#     responsible for skipping whitespace and comments
+#   next-token(line : stream byte, delim : byte, out : &slice)
+#     return (0, 0) sentinel on hitting comment or end of array
+#   slice-empty?(in : &slice) -> bool
+#   slice-equal?(in : &slice, s : &kernel-string) -> bool
+#   is-hex-int(in : &slice)
+#   parse-hex-int(in : &slice) -> int
+#   emit-maybe(out : &buffered-file, n : int, width : int)
+#   emit-hex-int(out : &buffered-file, n : int)
+#   emit(out : &buffered-file, word : &slice)
+#   has-metadata?(word : &slice, s : &kernel-string) -> bool
+
+# the main entry point
+convert:  # in : (address buffered-file), out : (address buffered-file), err : (address buffered-file), ed : (address exit-descriptor) -> <void>
+    # pseudocode:
+    #   line = new-stream(512, 1)
+    #   repeatedly
+    #     clear-stream(line)
+    #     EAX = read-line(in, line, err, ed)
+    #     if EAX == EOF break
+    #     convert-instruction(line, out, err, ed)
+    #   flush(out)
+    #
+    # . prolog
+    55/push-EBP
+    89/copy                         3/mod/direct    5/rm32/EBP    .           .             .           4/r32/ESP   .               .                 # copy ESP to EBP
+    # . save registers
+    # . restore registers
+    # . epilog
+    89/copy                         3/mod/direct    4/rm32/ESP    .           .             .           5/r32/EBP   .               .                 # copy EBP to ESP
+    5d/pop-to-EBP
+    c3/return
+
+== data
+
+# . . vim:nowrap:textwidth=0