# Some tokenization primitives. == code # instruction effective address register displacement immediate # . op subop mod rm32 base index scale r32 # . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes # extract the next run of characters that are different from a given 'delimiter' (skipping multiple delimiters if necessary) # on reaching end of file, return an empty interval next-token-from-slice: # start: (addr byte), end: (addr byte), delimiter: byte, out: (addr slice) # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp # . save registers 50/push-eax 51/push-ecx 52/push-edx 57/push-edi # ecx = end 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 1/r32/ecx 0xc/disp8 . # copy *(ebp+12) to ecx # edx = delimiter 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 2/r32/edx 0x10/disp8 . # copy *(ebp+16) to edx # edi = out 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 7/r32/edi 0x14/disp8 . # copy *(ebp+20) to edi # eax = skip-chars-matching-in-slice(start, end, delimiter) # . . push args 52/push-edx 51/push-ecx ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 . # push *(ebp+8) # . . call e8/call skip-chars-matching-in-slice/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp # out->start = eax 89/copy 0/mod/indirect 7/rm32/edi . . . 0/r32/eax . . # copy eax to *edi # eax = skip-chars-not-matching-in-slice(eax, end, delimiter) # . . push args 52/push-edx 51/push-ecx 50/push-eax # . . call e8/call skip-chars-not-matching-in-slice/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp # out->end = eax 89/copy 1/mod/*+disp8 7/rm32/edi . . . 0/r32/eax 4/disp8 . # copy eax to *(edi+4) # . restore registers 5f/pop-to-edi 5a/pop-to-edx 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp 5d/pop-to-ebp c3/return test-next-token-from-slice: # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp # (eax..ecx) = " ab" b8/copy-to-eax " ab"/imm32 8b/copy 0/mod/indirect 0/rm32/eax . . . 1/r32/ecx . . # copy *eax to ecx 8d/copy-address 1/mod/*+disp8 4/rm32/sib 0/base/eax 1/index/ecx . 1/r32/ecx 4/disp8 . # copy eax+ecx+4 to ecx 05/add-to-eax 4/imm32 # var out/edi: slice 68/push 0/imm32/end 68/push 0/imm32/start 89/copy 3/mod/direct 7/rm32/edi . . . 4/r32/esp . . # copy esp to edi # next-token-from-slice(eax, ecx, 0x20/space, out) # . . push args 57/push-edi 68/push 0x20/imm32 51/push-ecx 50/push-eax # . . call e8/call next-token-from-slice/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp # out->start should be at the 'a' # . check-ints-equal(out->start - in->start, 2, msg) # . . push args 68/push "F - test-next-token-from-slice: start"/imm32 68/push 2/imm32 # . . push out->start - in->start 8b/copy 0/mod/indirect 7/rm32/edi . . . 1/r32/ecx . . # copy *edi to ecx 2b/subtract 3/mod/direct 0/rm32/eax . . . 1/r32/ecx . . # subtract eax from ecx 51/push-ecx # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp # out->end should be after the 'b' # check-ints-equal(out->end - in->start, 4, msg) # . . push args 68/push "F - test-next-token-from-slice: end"/imm32 68/push 4/imm32 # . . push out->end - in->start 8b/copy 1/mod/*+disp8 7/rm32/edi . . . 1/r32/ecx 4/disp8 . # copy *(edi+4) to ecx 2b/subtract 3/mod/direct 0/rm32/eax . . . 1/r32/ecx . . # subtract eax from ecx 51/push-ecx # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp # . epilogue 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp 5d/pop-to-ebp c3/return test-next-token-from-slice-Eof: # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp # var out/edi: slice 68/push 0/imm32/end 68/push 0/imm32/start 89/copy 3/mod/direct 7/rm32/edi . . . 4/r32/esp . . # copy esp to edi # next-token-from-slice(0, 0, 0x20/space, out) # . . push args 57/push-edi 68/push 0x20/imm32 68/push 0/imm32 68/push 0/imm32 # . . call e8/call next-token-from-slice/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp # out should be empty # . check-ints-equal(out->end - out->start, 0, msg) # . . push args 68/push "F - test-next-token-from-slice-Eof"/imm32 68/push 0/imm32 # . . push out->start - in->start 8b/copy 1/mod/*+disp8 7/rm32/edi . . . 1/r32/ecx 4/disp8 . # copy *(edi+4) to ecx 2b/subtract 0/mod/indirect 7/rm32/edi . . . 1/r32/ecx . . # subtract *edi from ecx 51/push-ecx # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp # . epilogue 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp 5d/pop-to-ebp c3/return test-next-token-from-slice-nothing: # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp # (eax..ecx) = " " b8/copy-to-eax " "/imm32 8b/copy 0/mod/indirect 0/rm32/eax . . . 1/r32/ecx . . # copy *eax to ecx 8d/copy-address 1/mod/*+disp8 4/rm32/sib 0/base/eax 1/index/ecx . 1/r32/ecx 4/disp8 . # copy eax+ecx+4 to ecx 05/add-to-eax 4/imm32 # var out/edi: slice 68/push 0/imm32/end 68/push 0/imm32/start 89/copy 3/mod/direct 7/rm32/edi . . . 4/r32/esp . . # copy esp to edi # next-token-from-slice(in, 0x20/space, out) # . . push args 57/push-edi 68/push 0x20/imm32 51/push-ecx 50/push-eax # . . call e8/call next-token-from-slice/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp # out should be empty # . check-ints-equal(out->end - out->start, 0, msg) # . . push args 68/push "F - test-next-token-from-slice-Eof"/imm32 68/push 0/imm32 # . . push out->start - in->start 8b/copy 1/mod/*+disp8 7/rm32/edi . . . 1/r32/ecx 4/disp8 . # copy *(edi+4) to ecx 2b/subtract 0/mod/indirect 7/rm32/edi . . . 1/r32/ecx . . # subtract *edi from ecx 51/push-ecx # . . call e8/call check-ints-equal/disp32 # . . discard args 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp # . epilogue 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp 5d/pop-to-ebp c3/return skip-chars-matching: # in: (addr stream byte), delimiter: byte # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp # . save registers 50/push-eax 51/push-ecx 52/push-edx 53/push-ebx 56/push-esi # esi = in 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 8/disp8 . # copy *(ebp+8) to esi # ecx = in->read 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 1/r32/ecx 4/disp8 . # copy *(esi+4) to ecx # ebx = in->write 8b/copy 0/mod/indirect 6/rm32/esi . . . 3/r32/ebx . . # copy *esi to ebx # edx = delimiter 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 2/r32/edx 0xc/disp8 . # copy *(ebp+12) to edx $skip-chars-matching:loop: # if (in->read >= in->write) break 39/compare 3/mod/direct 1/rm32/ecx . . . 3/r32/ebx . . # compare ecx with ebx 7d/jump-if->= $skip-chars-matching:end/disp8 # eax = in->data[in->read] 31/xor 3/mod/direct 0/rm32/eax . . . 0/r32/eax . . # clear eax 8a/copy-byte 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/AL 0xc/disp8 . # copy byte at *(es