From b625c6304eca827c04eda719fa6f7927294b80bc Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Thu, 29 Jul 2021 20:07:13 -0700 Subject: support non-line-oriented processing in next-word Immediately this simplifies support for comments in image data. --- 127next-word.subx | 37 ++++++++++++++++++++++++++----------- 400.mu | 4 ++-- 511image.mu | 31 +++++++++++++++++++------------ linux/127next-word.subx | 37 ++++++++++++++++++++++++++----------- linux/assort | Bin 46755 -> 46801 bytes linux/braces | Bin 48808 -> 48854 bytes linux/calls | Bin 53791 -> 53837 bytes linux/dquotes | Bin 50377 -> 50423 bytes linux/hex | Bin 48935 -> 48981 bytes linux/labels_baremetal | Bin 51495 -> 51541 bytes linux/mu | Bin 608842 -> 608888 bytes linux/pack | Bin 59765 -> 59811 bytes linux/sigils | Bin 61162 -> 61208 bytes linux/survey_baremetal | Bin 47436 -> 47482 bytes linux/survey_elf | Bin 56329 -> 56375 bytes linux/tests | Bin 45546 -> 45592 bytes 16 files changed, 73 insertions(+), 36 deletions(-) diff --git a/127next-word.subx b/127next-word.subx index 5af326d4..17f9e88c 100644 --- a/127next-word.subx +++ b/127next-word.subx @@ -38,29 +38,44 @@ $next-word:check0: # . return out c7 0/subop/copy 0/mod/direct 7/rm32/edi . . . . . 0/imm32 # copy to *edi c7 0/subop/copy 1/mod/*+disp8 7/rm32/edi . . . . 4/disp8 0/imm32 # copy to *(edi+4) - eb/jump $next-word:end/disp8 + e9/jump $next-word:end/disp32 $next-word:check-for-comment: # out->start = &line->data[line->read] 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 1/r32/ecx 4/disp8 . # copy *(esi+4) to ecx 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/eax 0xc/disp8 . # copy esi+ecx+12 to eax 89/copy 0/mod/indirect 7/rm32/edi . . . 0/r32/eax . . # copy eax to *edi - # if (line->data[line->read] == '#') out->end = &line->data[line->write]), skip rest of stream and return + # if (line->data[line->read] == '#') return rest of line # . eax = line->data[line->read] 31/xor 3/mod/direct 0/rm32/eax . . . 0/r32/eax . . # clear eax 8a/copy-byte 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/AL 0xc/disp8 . # copy byte at *(esi+ecx+12) to AL # . compare 3d/compare-eax-and 0x23/imm32/pound - 75/jump-if-!= $next-word:regular-word/disp8 + 0f 85/jump-if-!= $next-word:regular-word/disp32 $next-word:comment: - # . out->end = &line->data[line->write] - 8b/copy 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # copy *esi to eax - 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 0/index/eax . 0/r32/eax 0xc/disp8 . # copy esi+eax+12 to eax + # out->end = out->start + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/eax 0xc/disp8 . # copy esi+ecx+12 to eax 89/copy 1/mod/*+disp8 7/rm32/edi . . . 0/r32/eax 4/disp8 . # copy eax to *(edi+4) - # . line->read = line->write - 8b/copy 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # copy *esi to eax - 89/copy 1/mod/*+disp8 6/rm32/esi . . . 0/r32/eax 4/disp8 . # copy eax to *(esi+4) - # . return - eb/jump $next-word:end/disp8 + # var write/ecx: int = line->write + 8b/copy 0/mod/indirect 6/rm32/esi . . . 1/r32/ecx . . # copy *esi to ecx +$next-word:comment-loop: + # if (line->read >= line->write) break + 39/compare 1/mod/*+disp8 6/rm32/esi . . . 1/r32/ecx 4/disp8 . # compare *(esi+4) with ecx + 0f 8d/jump-if->= $next-word:comment-break/disp32 + # ++line->read + ff 0/subop/increment 1/mod/*+disp8 6/rm32/esi . . . . 4/disp8 . # increment *(esi+4) + # ++out->end + ff 0/subop/increment 1/mod/*+disp8 7/rm32/edi . . . . 4/disp8 . # increment *(edi+4) + # if (*out->end == newline) break + 8b/copy 1/mod/*+disp8 7/rm32/edi . . . 0/r32/eax 4/disp8 . # copy *(edi+4) to eax + 8a/copy-byte 0/mod/indirect 0/rm32/eax . . . 0/r32/AL . . # copy byte at *eax to AL + 25/and-eax-with 0xff/imm32 + 3d/compare-eax-and 0xa/imm32/newline + 0f 84/jump-if-= $next-word:comment-break/disp32 + # loop + e9/jump $next-word:comment-loop/disp32 +$next-word:comment-break: + # return + e9/jump $next-word:end/disp32 $next-word:regular-word: # otherwise skip-chars-not-matching-whitespace(line) # including trailing newline # . . push args diff --git a/400.mu b/400.mu index ec050342..6640a524 100644 --- a/400.mu +++ b/400.mu @@ -96,8 +96,8 @@ sig to-decimal-digit in: grapheme -> _/eax: int # bad name alert # next-word really tokenizes # next-raw-word really reads whitespace-separated words -sig next-word line: (addr stream byte), out: (addr slice) # skips '#' comments -sig next-raw-word line: (addr stream byte), out: (addr slice) # does not skip '#' comments +sig next-word line: (addr stream byte), out: (addr slice) # merges '#' comments into a single word +sig next-raw-word line: (addr stream byte), out: (addr slice) # does not merge '#' comments sig skip-chars-matching in: (addr stream byte), delimiter: byte sig skip-chars-matching-whitespace in: (addr stream byte) sig skip-chars-not-matching in: (addr stream byte), delimiter: byte diff --git a/511image.mu b/511image.mu index 94787d66..7d4127f4 100644 --- a/511image.mu +++ b/511image.mu @@ -18,7 +18,7 @@ fn initialize-image _self: (addr image), in: (addr stream byte) { var self/esi: (addr image) <- copy _self var mode-storage: slice var mode/ecx: (addr slice) <- address mode-storage - next-word in, mode + next-word-skipping-comments in, mode { var P1?/eax: boolean <- slice-equal? mode, "P1" compare P1?, 0/false @@ -90,10 +90,10 @@ fn initialize-image-from-pbm _self: (addr image), in: (addr stream byte) { var curr-word-storage: slice var curr-word/ecx: (addr slice) <- address curr-word-storage # load width, height - next-word in, curr-word + next-word-skipping-comments in, curr-word var tmp/eax: int <- parse-decimal-int-from-slice curr-word var width/edx: int <- copy tmp - next-word in, curr-word + next-word-skipping-comments in, curr-word tmp <- parse-decimal-int-from-slice curr-word var height/ebx: int <- copy tmp # save width, height @@ -112,7 +112,7 @@ fn initialize-image-from-pbm _self: (addr image), in: (addr stream byte) { { compare i, capacity break-if->= - next-word in, curr-word + next-word-skipping-comments in, curr-word var src/eax: int <- parse-decimal-int-from-slice curr-word { var dest/ecx: (addr byte) <- index data, i @@ -202,14 +202,14 @@ fn initialize-image-from-pgm _self: (addr image), in: (addr stream byte) { var curr-word-storage: slice var curr-word/ecx: (addr slice) <- address curr-word-storage # load width, height - next-word in, curr-word + next-word-skipping-comments in, curr-word var tmp/eax: int <- parse-decimal-int-from-slice curr-word var width/edx: int <- copy tmp - next-word in, curr-word + next-word-skipping-comments in, curr-word tmp <- parse-decimal-int-from-slice curr-word var height/ebx: int <- copy tmp # check and save color levels - next-word in, curr-word + next-word-skipping-comments in, curr-word { tmp <- parse-decimal-int-from-slice curr-word compare tmp, 0xff @@ -234,7 +234,7 @@ fn initialize-image-from-pgm _self: (addr image), in: (addr stream byte) { { compare i, capacity break-if->= - next-word in, curr-word + next-word-skipping-comments in, curr-word var src/eax: int <- parse-decimal-int-from-slice curr-word { var dest/ecx: (addr byte) <- index data, i @@ -688,13 +688,13 @@ fn initialize-image-from-ppm _self: (addr image), in: (addr stream byte) { var curr-word-storage: slice var curr-word/ecx: (addr slice) <- address curr-word-storage # load width, height - next-word in, curr-word + next-word-skipping-comments in, curr-word var tmp/eax: int <- parse-decimal-int-from-slice curr-word var width/edx: int <- copy tmp - next-word in, curr-word + next-word-skipping-comments in, curr-word tmp <- parse-decimal-int-from-slice curr-word var height/ebx: int <- copy tmp - next-word in, curr-word + next-word-skipping-comments in, curr-word # check color levels { tmp <- parse-decimal-int-from-slice curr-word @@ -725,7 +725,7 @@ fn initialize-image-from-ppm _self: (addr image), in: (addr stream byte) { { compare i, capacity break-if->= - next-word in, curr-word + next-word-skipping-comments in, curr-word var src/eax: int <- parse-decimal-int-from-slice curr-word { var dest/ecx: (addr byte) <- index data, i @@ -1111,3 +1111,10 @@ fn scale-image-height _img: (addr image), width: int -> _/ebx: int { var result/ebx: int <- convert result-f return result } + +fn next-word-skipping-comments line: (addr stream byte), out: (addr slice) { + next-word line, out + var retry?/eax: boolean <- slice-starts-with? out, "#" + compare retry?, 0/false + loop-if-!= +} diff --git a/linux/127next-word.subx b/linux/127next-word.subx index 5af326d4..17f9e88c 100644 --- a/linux/127next-word.subx +++ b/linux/127next-word.subx @@ -38,29 +38,44 @@ $next-word:check0: # . return out c7 0/subop/copy 0/mod/direct 7/rm32/edi . . . . . 0/imm32 # copy to *edi c7 0/subop/copy 1/mod/*+disp8 7/rm32/edi . . . . 4/disp8 0/imm32 # copy to *(edi+4) - eb/jump $next-word:end/disp8 + e9/jump $next-word:end/disp32 $next-word:check-for-comment: # out->start = &line->data[line->read] 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 1/r32/ecx 4/disp8 . # copy *(esi+4) to ecx 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/eax 0xc/disp8 . # copy esi+ecx+12 to eax 89/copy 0/mod/indirect 7/rm32/edi . . . 0/r32/eax . . # copy eax to *edi - # if (line->data[line->read] == '#') out->end = &line->data[line->write]), skip rest of stream and return + # if (line->data[line->read] == '#') return rest of line # . eax = line->data[line->read] 31/xor 3/mod/direct 0/rm32/eax . . . 0/r32/eax . . # clear eax 8a/copy-byte 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/AL 0xc/disp8 . # copy byte at *(esi+ecx+12) to AL # . compare 3d/compare-eax-and 0x23/imm32/pound - 75/jump-if-!= $next-word:regular-word/disp8 + 0f 85/jump-if-!= $next-word:regular-word/disp32 $next-word:comment: - # . out->end = &line->data[line->write] - 8b/copy 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # copy *esi to eax - 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 0/index/eax . 0/r32/eax 0xc/disp8 . # copy esi+eax+12 to eax + # out->end = out->start + 8d/copy-address 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/eax 0xc/disp8 . # copy esi+ecx+12 to eax 89/copy 1/mod/*+disp8 7/rm32/edi . . . 0/r32/eax 4/disp8 . # copy eax to *(edi+4) - # . line->read = line->write - 8b/copy 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # copy *esi to eax - 89/copy 1/mod/*+disp8 6/rm32/esi . . . 0/r32/eax 4/disp8 . # copy eax to *(esi+4) - # . return - eb/jump $next-word:end/disp8 + # var write/ecx: int = line->write + 8b/copy 0/mod/indirect 6/rm32/esi . . . 1/r32/ecx . . # copy *esi to ecx +$next-word:comment-loop: + # if (line->read >= line->write) break + 39/compare 1/mod/*+disp8 6/rm32/esi . . . 1/r32/ecx 4/disp8 . # compare *(esi+4) with ecx + 0f 8d/jump-if->= $next-word:comment-break/disp32 + # ++line->read + ff 0/subop/increment 1/mod/*+disp8 6/rm32/esi . . . . 4/disp8 . # increment *(esi+4) + # ++out->end + ff 0/subop/increment 1/mod/*+disp8 7/rm32/edi . . . . 4/disp8 . # increment *(edi+4) + # if (*out->end == newline) break + 8b/copy 1/mod/*+disp8 7/rm32/edi . . . 0/r32/eax 4/disp8 . # copy *(edi+4) to eax + 8a/copy-byte 0/mod/indirect 0/rm32/eax . . . 0/r32/AL . . # copy byte at *eax to AL + 25/and-eax-with 0xff/imm32 + 3d/compare-eax-and 0xa/imm32/newline + 0f 84/jump-if-= $next-word:comment-break/disp32 + # loop + e9/jump $next-word:comment-loop/disp32 +$next-word:comment-break: + # return + e9/jump $next-word:end/disp32 $next-word:regular-word: # otherwise skip-chars-not-matching-whitespace(line) # including trailing newline # . . push args diff --git a/linux/assort b/linux/assort index 9841efba..f4f73847 100755 Binary files a/linux/assort and b/linux/assort differ diff --git a/linux/braces b/linux/braces index 573a66be..0930284c 100755 Binary files a/linux/braces and b/linux/braces differ diff --git a/linux/calls b/linux/calls index f92f9d24..e2c8b5bb 100755 Binary files a/linux/calls and b/linux/calls differ diff --git a/linux/dquotes b/linux/dquotes index d523ca81..0339ae16 100755 Binary files a/linux/dquotes and b/linux/dquotes differ diff --git a/linux/hex b/linux/hex index 70293365..e6c63c83 100755 Binary files a/linux/hex and b/linux/hex differ diff --git a/linux/labels_baremetal b/linux/labels_baremetal index de2749d8..12a545ca 100755 Binary files a/linux/labels_baremetal and b/linux/labels_baremetal differ diff --git a/linux/mu b/linux/mu index cd11865e..ecf884a2 100755 Binary files a/linux/mu and b/linux/mu differ diff --git a/linux/pack b/linux/pack index 87671827..32db6ad3 100755 Binary files a/linux/pack and b/linux/pack differ diff --git a/linux/sigils b/linux/sigils index 64f78222..3bcd2d9c 100755 Binary files a/linux/sigils and b/linux/sigils differ diff --git a/linux/survey_baremetal b/linux/survey_baremetal index 46de25b7..b5429495 100755 Binary files a/linux/survey_baremetal and b/linux/survey_baremetal differ diff --git a/linux/survey_elf b/linux/survey_elf index bd1c816c..9d1a715c 100755 Binary files a/linux/survey_elf and b/linux/survey_elf differ diff --git a/linux/tests b/linux/tests index 65c743fd..04a112ef 100755 Binary files a/linux/tests and b/linux/tests differ -- cgit 1.4.1-2-gfad0 95 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252