diff options
-rw-r--r-- | subx/apps/pack.subx | 168 |
1 files changed, 119 insertions, 49 deletions
diff --git a/subx/apps/pack.subx b/subx/apps/pack.subx index 4c5be623..c7f0ea2d 100644 --- a/subx/apps/pack.subx +++ b/subx/apps/pack.subx @@ -83,6 +83,15 @@ $main:end: # primary state: line # stream of 512 bytes; abort if it ever overflows +# conceptual hierarchy within a line: +# line = words separated by ' ', maybe followed by comment starting with '#' +# word = name until '/', then 0 or more metadata separated by '/' +# +# we won't bother saving the internal structure of lines; reparsing should be cheap using three primitives: +# next-token(stream, delim char) -> slice (start, end pointers) +# next-token-from-slice(start, end, delim char) -> slice +# slice-equal?(slice, string) + convert: # in : (address buffered-file), out : (address buffered-file) -> <void> # pseudocode: # var line = new-stream(512, 1) @@ -1555,66 +1564,127 @@ test-convert-data-multiple-words: 5d/pop-to-EBP c3/return -# - To pack an instruction, following the C++ version: -# read first word as opcode and write-slice -# if 0f or f2 or f3 read second opcode and write-slice -# if 'f2 0f' or 'f3 0f' read third opcode and write-slice -# while true -# word-slice = next-word -# if empty(word-slice) break -# if has metadata 'mod', parse into mod -# if has metadata 'rm32', parse into rm32 -# if has metadata 'r32', parse into r32 -# if has metadata 'subop', parse into r32 -# if at least one of the 3 was present, print-byte -# while true -# word-slice = next-word -# if empty(word-slice) break -# if has metadata 'base', parse into base -# if has metadata 'index', parse into index -# if has metadata 'scale', parse into scale -# if at least one of the 3 was present, print-byte -# parse errors => <abort> -# while true -# word-slice = next-word -# if empty(word-slice) break -# if has metadata 'disp8', emit as 1 byte -# if has metadata 'disp16', emit as 2 bytes -# if has metadata 'disp32', emit as 4 bytes -# while true -# word-slice = next-word -# if empty(word-slice) break -# if has metadata 'imm8', emit -# if has metadata 'imm32', emit as 4 bytes -# finally, emit line prefixed with a ' # ' - # simplifications since we perform zero error handling (continuing to rely on the C++ version for that): # missing fields are always 0-filled # bytes never mentioned are silently dropped; if you don't provide /mod, /rm32 or /r32 you don't get a 0 modrm byte. You get *no* modrm byte. # in case of conflict, last operand with a name is recognized # silently drop extraneous operands # unceremoniously abort on non-numeric operands except disp or imm +# opcodes must be lowercase and zero padded -# conceptual hierarchy within a line: -# line = words separated by ' ', maybe followed by comment starting with '#' -# word = name until '/', then 0 or more metadata separated by '/' -# -# we won't bother saving the internal structure of lines; reparsing should be cheap using three primitives: -# next-token(stream, delim char) -> slice (start, end pointers) -# next-token(stream, slice, delim char) -> slice' -# slice-equal?(slice, string) +# todo: end each line with original unprocessed line in a comment +# pack an instruction, following the C++ version: convert-instruction: # line : (address stream byte), out : (address buffered-file) -> <void> # pseudocode: + # var word-slice = next-word(line) + # if slice-empty?(word-slice) + # write-stream-buffered(out, line) + # return + # if slice-starts-with?(word-slice, "#") + # write-stream-buffered(out, line) + # return + # if slice-ends-with?(word-slice, ":") + # write-stream-buffered(out, line) + # return + # # convert opcodes + # var op1 = word-slice + # write-slice(out, op1) + # if slice-equal?(op1, "0f") or slice-equal?(op1, "f2") or slice-equal?(op1, "f3") + # var op2 = next-word(line) + # if slice-empty?(op2) + # return + # if slice-starts-with?(op2, "#") + # return + # write-slice(out, op2) + # if slice-equal?(op1, "f2") or slice-equal?(op1, "f3") + # if slice-equal?(op2, "0f") + # var op3 = next-word(line) + # if slice-empty?(op3) + # return + # if slice-starts-with?(op2, "#") + # return + # write-slice(out, op3) + # # pack modrm and sib + # var has-modrm? = false, mod = 0, rm32 = 0, r32 = 0 + # var has-sib? = false, base = 0, index = 4 (none), scale = 0 # while true - # word-slice = next-word - # if slice-empty?(word-slice) # end of file (maybe including trailing whitespace) - # write-stream-buffered(out, line) - # if slice-starts-with?(word-slice, "#") # comment - # write-stream-buffered(out, line) - # else if slice-ends-with?(word-slice, ":") # label - # write-stream-buffered(out, line) - # ... + # word-slice = next-word(line) + # if (empty(word-slice)) break + # if (slice-starts-with?(word-slice, "#")) break + # if (has-metadata?(word-slice, "mod")) + # var mod = parse-hex-int(next-token-from-slice(word-slice, "/")) + # has-modrm? = true + # else if (has-metadata?(word-slice, "rm32")) + # var rm32 = parse-hex-int(next-token-from-slice(word-slice, "/")) + # has-modrm? = true + # else if (has-metadata?(word-slice, "r32") or has-metadata?(word-slice, "subop")) + # var r32 = parse-hex-int(next-token-from-slice(word-slice, "/")) + # has-modrm? = true + # else if (has-metadata?(word-slice, "base") + # var base = parse-hex-int(next-token-from-slice(word-slice, "/")) + # has-sib? = true + # else if (has-metadata?(word-slice, "index") + # var index = parse-hex-int(next-token-from-slice(word-slice, "/")) + # has-sib? = true + # else if (has-metadata?(word-slice, "scale") + # var scale = parse-hex-int(next-token-from-slice(word-slice, "/")) + # has-sib? = true + # if has-modrm? + # var modrm = mod & 0b11 + # modrm <<= 2 + # modrm |= r32 & 0b111 + # modrm <<= 3 + # modrm |= rm32 & 0b111 + # emit-hex(out, modrm, 1) + # if has-sib? + # var sib = scale & 0b11 + # sib <<= 2 + # sib |= index & 0b111 + # sib <<= 3 + # sib |= base & 0b111 + # emit-hex(out, sib, 1) + # # emit disp bytes + # rewind-stream(line) + # while true + # word-slice = next-word(line) + # if (empty(word-slice)) break + # if (slice-starts-with?(word-slice, "#")) break + # if has-metadata?(word-slice, "disp8") + # var disp = parse-hex-int(next-token-from-slice(word-slice, "/")) + # emit-hex(out, disp, 1) + # break + # else if has-metadata?(word-slice, "disp16") + # var disp = parse-hex-int(next-token-from-slice(word-slice, "/")) + # emit-hex(out, disp, 2) + # break + # else if has-metadata?(word-slice, "disp32") + # var disp = parse-hex-int(next-token-from-slice(word-slice, "/")) + # emit-hex(out, disp, 4) + # break + # # emit imm bytes + # rewind-stream(line) + # while true + # word-slice = next-word(line) + # if (slice-starts-with?(word-slice, "#")) break + # if (empty(word-slice)) break + # if has-metadata?(word-slice, "imm8") + # var imm = parse-hex-int(next-token-from-slice(word-slice, "/")) + # emit-hex(out, imm, 1) + # break + # if has-metadata?(word-slice, "imm16") + # var imm = parse-hex-int(next-token-from-slice(word-slice, "/")) + # emit-hex(out, imm, 2) + # break + # else if has-metadata?(word-slice, "imm32") + # var imm = parse-hex-int(next-token-from-slice(word-slice, "/")) + # emit-hex(out, imm, 4) + # break + # + # Exit blocks: + # pass-through: + # write-stream-buffered(out, line) + # return # # . prolog 55/push-EBP |