From c09c91e185aad8e01b570c2569e13c1e429d2f99 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sun, 5 Jul 2020 12:13:28 -0700 Subject: 6612 - reorganize layers --- 111read.subx | 438 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 438 insertions(+) create mode 100644 111read.subx (limited to '111read.subx') diff --git a/111read.subx b/111read.subx new file mode 100644 index 00000000..1f5a557d --- /dev/null +++ b/111read.subx @@ -0,0 +1,438 @@ +# read: analogously to write, support reading from in-memory streams in +# addition to file descriptors. +# +# We can pass it either a file descriptor or an address to a stream. If a +# file descriptor is passed in, we _read from it using the right syscall. If a +# stream is passed in (a fake file descriptor), we read from it instead. This +# lets us initialize input for tests. +# +# A little counter-intuitively, the output of 'read' ends up in.. a stream. So +# tests end up doing a redundant copy. Why? Well, consider the alternatives: +# +# a) Reading into a string, and returning a pointer to the end of the read +# region, or a count of bytes written. Now this count or end pointer must be +# managed separately by the caller, which can be error-prone. +# +# b) Having 'read' return a buffer that it allocates. But there's no way to +# know in advance how large to make the buffer. If you read less than the +# size of the buffer you again end up needing to manage initialized vs +# uninitialized memory. +# +# c) Creating more helpful variants like 'read-byte' or 'read-until' which +# also can take a file descriptor or stream, just like 'write'. But such +# primitives don't exist in the Linux kernel, so we'd be implementing them +# somehow, either with more internal buffering or by making multiple +# syscalls. +# +# Reading into a stream avoids these problems. The buffer is externally +# provided and the caller has control over where it's allocated, its lifetime, +# and so on. The buffer's read and write pointers are internal to it so it's +# easier to keep in a consistent state. And it can now be passed directly to +# helpers like 'read-byte' or 'read-until' that only need to support streams, +# never file descriptors. +# +# Like with 'write', we assume our data segment will never begin at an address +# shorter than 0x08000000, so any smaller arguments are assumed to be real +# file descriptors. +# +# As a reminder, a stream looks like this: +# write: int # index at which to write to next +# read: int # index at which to read next +# data: (array byte) # prefixed by size as usual + +== code +# instruction effective address register displacement immediate +# . op subop mod rm32 base index scale r32 +# . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes + +read: # f: fd or (addr stream byte), s: (addr stream byte) -> num-bytes-read/eax: int + # . prologue + 55/push-ebp + 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp + # if (f < 0x08000000) return _read(f, s) # f can't be a user-mode address, so treat it as a kernel file descriptor + 81 7/subop/compare 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 0x08000000/imm32 # compare *(ebp+8) + 73/jump-if-addr>= $read:fake/disp8 + # . . push args + ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12) + ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 . # push *(ebp+8) + # . . call + e8/call _read/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # return + eb/jump $read:end/disp8 +$read:fake: + # otherwise, treat 'f' as a stream to scan from + # . save registers + 56/push-esi + 57/push-edi + # esi = f + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 8/disp8 . # copy *(ebp+8) to esi + # edi = s + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 7/r32/edi 0xc/disp8 . # copy *(ebp+12) to esi + # eax = _buffer-4(out = &s->data[s->write], outend = &s->data[s->size], + # in = &f->data[f->read], inend = &f->data[f->write]) + # . . push &f->data[f->write] + 8b/copy 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # copy *esi to eax + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 0/index/eax . 0/r32/eax 0xc/disp8 . # copy esi+eax+12 to eax + 50/push-eax + # . . push &f->data[f->read] + 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 0/r32/eax 4/disp8 . # copy *(esi+4) to eax + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 0/index/eax . 0/r32/eax 0xc/disp8 . # copy esi+eax+12 to eax + 50/push-eax + # . . push &s->data[s->size] + 8b/copy 1/mod/*+disp8 7/rm32/edi . . . 0/r32/eax 8/disp8 . # copy *(edi+8) to eax + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 7/base/edi 0/index/eax . 0/r32/eax 0xc/disp8 . # copy edi+eax+12 to eax + 50/push-eax + # . . push &s->data[s->write] + 8b/copy 0/mod/indirect 7/rm32/edi . . . 0/r32/eax . . # copy *edi to eax + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 7/base/edi 0/index/eax . 0/r32/eax 0xc/disp8 . # copy edi+eax+12 to eax + 50/push-eax + # . . call + e8/call _buffer-4/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp + # s->write += eax + 01/add 0/mod/indirect 7/rm32/edi . . . 0/r32/eax . . # add eax to *edi + # f->read += eax + 01/add 1/mod/*+disp8 6/rm32/esi . . . 0/r32/eax 4/disp8 . # add eax to *(esi+4) + # . restore registers + 5f/pop-to-edi + 5e/pop-to-esi +$read:end: + # . epilogue + 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp + 5d/pop-to-ebp + c3/return + +# - helpers + +# '_buffer' is like '_append', but silently stops instead of aborting when it runs out of space + +# 3-argument variant of _buffer +_buffer-3: # out: address, outend: address, s: (array byte) -> num_bytes_buffered/eax + # . prologue + 55/push-ebp + 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp + # . save registers + 51/push-ecx + # eax = _buffer-4(out, outend, &s->data[0], &s->data[s->size]) + # . . push &s->data[s->size] + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . 0/r32/eax 0x10/disp8 . # copy *(ebp+16) to eax + 8b/copy 0/mod/indirect 0/rm32/eax . . . 1/r32/ecx . . # copy *eax to ecx + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 0/base/eax 1/index/ecx . 1/r32/ecx 4/disp8 . # copy eax+ecx+4 to ecx + 51/push-ecx + # . . push &s->data[0] + 8d/copy-address 1/mod/*+disp8 0/rm32/eax . . . 1/r32/ecx 4/disp8 . # copy eax+4 to ecx + 51/push-ecx + # . . push outend + ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12) + # . . push out + ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 . # push *(ebp+8) + # . . call + e8/call _buffer-4/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp +$_buffer-3:end: + # . restore registers + 59/pop-to-ecx + # . epilogue + 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp + 5d/pop-to-ebp + c3/return + +# 4-argument variant of _buffer +_buffer-4: # out: address, outend: address, in: address, inend: address -> num_bytes_buffered/eax + # . prologue + 55/push-ebp + 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp + # . save registers + 51/push-ecx + 52/push-edx + 53/push-ebx + 56/push-esi + 57/push-edi + # eax/num_bytes_buffered = 0 + b8/copy-to-eax 0/imm32 + # edi = out + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 7/r32/edi 8/disp8 . # copy *(ebp+8) to edi + # edx = outend + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 2/r32/edx 0xc/disp8 . # copy *(ebp+12) to edx + # esi = in + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 0x10/disp8 . # copy *(ebp+16) to esi + # ecx = inend + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 1/r32/ecx 0x14/disp8 . # copy *(ebp+20) to ecx +$_buffer-4:loop: + # if (in >= inend) break + 39/compare 3/mod/direct 6/rm32/esi . . . 1/r32/ecx . . # compare esi with ecx + 73/jump-if-addr>= $_buffer-4:end/disp8 + # if (out >= outend) break # for now silently ignore filled up buffer + 39/compare 3/mod/direct 7/rm32/edi . . . 2/r32/edx . . # compare edi with edx + 73/jump-if-addr>= $_buffer-4:end/disp8 + # *out = *in + 8a/copy-byte 0/mod/indirect 6/rm32/esi . . . 3/r32/BL . . # copy byte at *esi to BL + 88/copy-byte 0/mod/indirect 7/rm32/edi . . . 3/r32/BL . . # copy byte at BL to *edi + # ++num_bytes_buffered + 40/increment-eax + # ++in + 46/increment-esi + # ++out + 47/increment-edi + eb/jump $_buffer-4:loop/disp8 +$_buffer-4:end: + # . restore registers + 5f/pop-to-edi + 5e/pop-to-esi + 5b/pop-to-ebx + 5a/pop-to-edx + 59/pop-to-ecx + # . epilogue + 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp + 5d/pop-to-ebp + c3/return + +# idea: a clear-if-empty method on streams that clears only if f->read == f->write +# Unclear how I'd use it, though. Callers seem to need the check anyway. +# Maybe a better helper would be 'empty-stream?' + +_read: # fd: int, s: (addr stream byte) -> num-bytes-read/eax: int + # . prologue + 55/push-ebp + 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp + # . save registers + 51/push-ecx + 52/push-edx + 53/push-ebx + 56/push-esi + # esi = s + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 0xc/disp8 . # copy *(ebp+12) to esi + # eax = s->write + 8b/copy 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # copy *esi to eax + # edx = s->size + 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 2/r32/edx 8/disp8 . # copy *(esi+8) to edx + # syscall(read, fd, &s->data[s->write], s->size - s->write) + # . . fd: ebx + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 3/r32/ebx 8/disp8 . # copy *(ebp+8) to ebx + # . . data: ecx = &s->data[s->write] + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 0/index/eax . 1/r32/ecx 0xc/disp8 . # copy esi+eax+12 to ecx + # . . size: edx = s->size - s->write + 29/subtract 3/mod/direct 2/rm32/edx . . . 0/r32/eax . . # subtract eax from edx + # . . syscall + e8/call syscall_read/disp32 + # add the result eax to s->write + 01/add 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # add eax to *esi +$_read:end: + # . restore registers + 5e/pop-to-esi + 5b/pop-to-ebx + 5a/pop-to-edx + 59/pop-to-ecx + # . epilogue + 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp + 5d/pop-to-ebp + c3/return + + # Two options: + # 1 (what we have above): + # ecx = s + # eax = s->write + # edx = s->size + # # syscall + # ecx = lea ecx+eax+12 + # edx = sub edx eax + # + # 2: + # ecx = s + # edx = s->size + # ecx = &s->data + # # syscall + # ecx = add ecx, s->write + # edx = sub edx, s->write + # + # Not much to choose between the two? Option 2 performs a duplicate load to + # use one less register, but doesn't increase the amount of spilling (ecx + # and edx must be used, and eax must be clobbered anyway). + +# - tests + +test-read-single: + # - write a single character into _test-stream, then read from it + # clear-stream(_test-stream) + # . . push args + 68/push _test-stream/imm32 + # . . call + e8/call clear-stream/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp + # clear-stream(_test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + # . . call + e8/call clear-stream/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp + # write(_test-stream, "Ab") + # . . push args + 68/push "Ab"/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call write/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # eax = read(_test-stream, _test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call read/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # check-ints-equal(eax, 2, msg) + # . . push args + 68/push "F - test-read-single: return value"/imm32 + 68/push 2/imm32 + 50/push-eax + # . . call + e8/call check-ints-equal/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp + # check-stream-equal(_test-tmp-stream, "Ab", msg) + # . . push args + 68/push "F - test-read-single"/imm32 + 68/push "Ab"/imm32 + 68/push _test-tmp-stream/imm32 + # . . call + e8/call check-stream-equal/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp + # end + c3/return + +test-read-is-stateful: + # - make two consecutive reads, check that their results are appended + # clear-stream(_test-stream) + # . . push args + 68/push _test-stream/imm32 + # . . call + e8/call clear-stream/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp + # clear-stream(_test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + # . . call + e8/call clear-stream/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp + # write(_test-stream, "C") + # . . push args + 68/push "C"/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call write/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # read(_test-stream, _test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call read/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # write(_test-stream, "D") + # . . push args + 68/push "D"/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call write/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # read(_test-stream, _test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call read/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # check-stream-equal(_test-tmp-stream, "CD", msg) + # . . push args + 68/push "F - test-read-is-stateful"/imm32 + 68/push "CD"/imm32 + 68/push _test-tmp-stream/imm32 + # . . call + e8/call check-stream-equal/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp + # end + c3/return + +test-read-returns-0-on-end-of-file: + # - read after hitting end-of-file, check that result is 0 + # setup + # . clear-stream(_test-stream) + # . . push args + 68/push _test-stream/imm32 + # . . call + e8/call clear-stream/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp + # . clear-stream(_test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + # . . call + e8/call clear-stream/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp + # . write(_test-stream, "Ab") + # . . push args + 68/push "Ab"/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call write/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # first read gets to end-of-file + # . read(_test-stream, _test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call read/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # second read + # . read(_test-stream, _test-tmp-stream) + # . . push args + 68/push _test-tmp-stream/imm32 + 68/push _test-stream/imm32 + # . . call + e8/call read/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # check-ints-equal(eax, 0, msg) + # . . push args + 68/push "F - test-read-returns-0-on-end-of-file"/imm32 + 68/push 0/imm32 + 50/push-eax + # . . call + e8/call check-ints-equal/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp + # end + c3/return + +== data + +_test-tmp-stream: # (stream byte) + # current write index + 0/imm32 + # current read index + 0/imm32 + # size + 8/imm32 + # data + 00 00 00 00 00 00 00 00 # 8 bytes + +# . . vim:nowrap:textwidth=0 -- cgit 1.4.1-2-gfad0