diff options
author | Kartik Agaram <vc@akkartik.com> | 2020-02-16 01:42:38 -0800 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2020-02-16 01:44:29 -0800 |
commit | d9ff5c3fb8964b2379aa9cd53cfb78da134119db (patch) | |
tree | 63272b4f79bc03f82f684492a261027c72123d90 | |
parent | deacf2c94e7b6a549c5b302e18e5f7d1a68ec2a8 (diff) | |
download | mu-d9ff5c3fb8964b2379aa9cd53cfb78da134119db.tar.gz |
6009 - significantly cleaner lexing
This cleans up a bunch of little warts that had historically accumulated because of my bull-headedness in not designing a grammar up front. Let's see if the lack of a grammar comes up again. We now require that there be no space in variable declarations between the name and the colon separating it from its type.
-rw-r--r-- | apps/factorial.mu | 5 | ||||
-rwxr-xr-x | apps/mu | bin | 133605 -> 133440 bytes | |||
-rw-r--r-- | apps/mu.subx | 164 |
3 files changed, 34 insertions, 135 deletions
diff --git a/apps/factorial.mu b/apps/factorial.mu index 7d9015e2..a94be482 100644 --- a/apps/factorial.mu +++ b/apps/factorial.mu @@ -1,14 +1,13 @@ fn main -> exit-status/ebx: int { #? run-tests #? result <- copy 0 - test-factorial # abc var tmp/eax: int <- factorial 5 exit-status <- copy tmp } fn factorial n: int -> result/eax: int { compare n 1 - { # foo + { break-if-> result <- copy 1 } @@ -16,7 +15,7 @@ fn factorial n: int -> result/eax: int { break-if-<= var tmp/ecx: int <- copy n tmp <- decrement - result <- factorial tmp # test comment + result <- factorial tmp result <- multiply n } } diff --git a/apps/mu b/apps/mu index a468f9d5..c11ce01e 100755 --- a/apps/mu +++ b/apps/mu Binary files differdiff --git a/apps/mu.subx b/apps/mu.subx index 778ef264..8765e5f9 100644 --- a/apps/mu.subx +++ b/apps/mu.subx @@ -507,7 +507,7 @@ test-convert-function-with-arg: (clear-stream _test-output-stream) (clear-stream $_test-output-buffered-file->buffer) # - (write _test-input-stream "fn foo n : int {\n") + (write _test-input-stream "fn foo n: int {\n") (write _test-input-stream "}\n") # convert (convert-mu _test-input-buffered-file _test-output-buffered-file) @@ -543,7 +543,7 @@ test-convert-function-with-arg-and-body: (clear-stream $_test-output-buffered-file->buffer) c7 0/subop/copy *Next-block-index 1/imm32 # - (write _test-input-stream "fn foo n : int {\n") + (write _test-input-stream "fn foo n: int {\n") (write _test-input-stream " increment n\n") (write _test-input-stream "}\n") # convert @@ -1877,7 +1877,7 @@ test-function-header-with-arg: 89/<- %ebp 4/r32/esp # setup (clear-stream _test-input-stream) - (write _test-input-stream "foo n : int {\n") + (write _test-input-stream "foo n: int {\n") # var result/ecx: function 2b/subtract-> *Function-size 4/r32/esp 89/<- %ecx 4/r32/esp @@ -2025,26 +2025,22 @@ test-function-with-multiple-args-and-outputs: # format for variables with types # x: int -# x: int # x: int, -# ignores at most one trailing colon or comma +# x/eax: int +# x/eax: int, +# ignores at most one trailing comma +# WARNING: modifies name parse-var-with-type: # name: (addr slice), first-line: (addr stream byte) -> result/eax: (handle var) # pseudocode: # var v: (handle var) = allocate(Heap, Var-size) # var s: slice + # if (!slice-ends-with(name, ":")) + # abort + # --name->end to skip ':' # next-token-from-slice(name->start, name->end, '/', s) - # var end: (addr byte) = s->end - # if (slice-ends-with(s, ":")) - # decrement s->end - # if (slice-ends-with(s, ",")) - # decrement s->end # v->name = slice-to-string(s) # ## register - # next-token-from-slice(end, name->end, '/', s) - # if (slice-ends-with(s, ":")) - # decrement s->end - # if (slice-ends-with(s, ",")) - # decrement s->end + # next-token-from-slice(s->end, name->end, '/', s) # if (!slice-empty?(s)) # v->register = slice-to-string(s) # ## type @@ -2061,12 +2057,21 @@ parse-var-with-type: # name: (addr slice), first-line: (addr stream byte) -> re 53/push-ebx 56/push-esi 57/push-edi + # esi = name + 8b/-> *(ebp+8) 6/r32/esi + # if (!slice-ends-with?(name, ":")) abort + 8b/-> *(esi+4) 1/r32/ecx # Slice-end + 49/decrement-ecx + 8a/copy-byte *ecx 1/r32/CL + 81 4/subop/and %ecx 0xff/imm32 + 81 7/subop/compare %ecx 0x3a/imm32/colon + 0f 85/jump-if-!= $parse-var-with-type:abort/disp32 + # --name->end to skip ':' + ff 1/subop/decrement *(esi+4) # var result/edi: (handle var) = allocate(Heap, Var-size) (allocate Heap *Var-size) # => eax (zero-out %eax *Var-size) 89/<- %edi 0/r32/eax - # esi = name - 8b/-> *(ebp+8) 6/r32/esi # var s/ecx: slice 68/push 0/imm32/end 68/push 0/imm32/start @@ -2076,52 +2081,12 @@ $parse-var-with-type:save-name: (next-token-from-slice *esi *(esi+4) 0x2f %ecx) # Slice-start, Slice-end, '/' # . end/edx = s->end 8b/-> *(ecx+4) 2/r32/edx - # . if s ends with ':', decrement s->end - { - 8b/-> *(ecx+4) 0/r32/eax - 48/decrement-eax - 8a/copy-byte *eax 3/r32/BL - 81 4/subop/and %ebx 0xff/imm32 - 81 7/subop/compare %ebx 0x3a/imm32/colon - 75/jump-if-!= break/disp8 - 89/<- *(ecx+4) 0/r32/eax - } - # . if s ends with ',', decrement s->end - { - 8b/-> *(ecx+4) 0/r32/eax - 48/decrement-eax - 8a/copy-byte *eax 3/r32/BL - 81 4/subop/and %ebx 0xff/imm32 - 81 7/subop/compare %ebx 0x2c/imm32/comma - 75/jump-if-!= break/disp8 - 89/<- *(ecx+4) 0/r32/eax - } $parse-var-with-type:write-name: (slice-to-string Heap %ecx) # => eax 89/<- *edi 0/r32/eax # Var-name # save v->register $parse-var-with-type:save-register: (next-token-from-slice %edx *(esi+4) 0x2f %ecx) # end, name->end, '/' - # . if s ends with ':', decrement s->end - { - 8b/-> *(ecx+4) 0/r32/eax - 48/decrement-eax - 8a/copy-byte *eax 3/r32/BL - 81 4/subop/and %ebx 0xff/imm32 - 81 7/subop/compare %ebx 0x3a/imm32/colon - 75/jump-if-!= break/disp8 - 89/<- *(ecx+4) 0/r32/eax - } - # . if s ends with ',', decrement s->end - { - 8b/-> *(ecx+4) 0/r32/eax - 48/decrement-eax - 8a/copy-byte *eax 3/r32/BL - 81 4/subop/and %ebx 0xff/imm32 - 81 7/subop/compare %ebx 0x2c/imm32/comma - 75/jump-if-!= break/disp8 - 89/<- *(ecx+4) 0/r32/eax - } # if (!slice-empty?(s)) v->register = slice-to-string(s) { $parse-var-with-type:write-register: @@ -2153,7 +2118,7 @@ $parse-var-with-type:end: c3/return $parse-var-with-type:abort: - # error("function header not in form 'fn <name> {'") + # error("var should have form 'name: type' in '" line "'\n") (write-buffered Stderr "var should have form 'name: type' in '") (flush Stderr) (rewind-stream *(ebp+0xc)) @@ -2328,9 +2293,6 @@ next-mu-token: # in: (addr stream byte), out: (addr slice) # return # out->start = &in->data[in->read] # var curr-byte/eax: byte = in->data[in->read] - # if curr->byte == ':' # comment token - # ++in->read - # goto start # if curr->byte == ',' # comment token # ++in->read # goto start @@ -2360,25 +2322,11 @@ next-mu-token: # in: (addr stream byte), out: (addr slice) # break # if curr-byte == ')' # break - # if curr-byte == ':' - # break # if curr-byte == ',' # break # ++in->read # done: # out->end = &in->data[in->read] - # # hack: skip a few trailing delimiters, because we don't always use - # # this correct tokenizer in later tokens - # while true - # if in->read >= in->write - # break - # curr-byte = in->data[in->read] - # if curr-byte == ':' - # ++in->read - # else if curr-byte == ',' - # ++in->read - # else - # break # # . prologue 55/push-ebp @@ -2410,16 +2358,6 @@ $next-mu-token:check0: 31/xor %eax 0/r32/eax 8a/copy-byte *(esi+ecx+0xc) 0/r32/AL { -$next-mu-token:check-for-colon: - # if (curr-byte != ':') break - 3d/compare-eax-and 0x3a/imm32/colon - 75/jump-if-!= break/disp8 - # ++in->read - ff 0/subop/increment *(esi+4) - # restart - e9/jump $next-mu-token:start/disp32 - } - { $next-mu-token:check-for-comma: # if (curr-byte != ',') break 3d/compare-eax-and 0x2c/imm32/comma @@ -2492,9 +2430,6 @@ $next-mu-token:regular-word-without-metadata: # if (c == ')') break 3d/compare-eax-and 0x29/imm32/close-paren 0f 84/jump-if-= break/disp32 - # if (c == ':') break - 3d/compare-eax-and 0x3a/imm32/colon - 0f 84/jump-if-= break/disp32 # if (c == ',') break 3d/compare-eax-and 0x2c/imm32/comma 0f 84/jump-if-= break/disp32 @@ -2508,37 +2443,6 @@ $next-mu-token:done: 8b/-> *(esi+4) 1/r32/ecx 8d/copy-address *(esi+ecx+0xc) 0/r32/eax 89/<- *(edi+4) 0/r32/eax - { -$next-mu-token:skip-trailing-delimiters: - # if (in->read >= in->write) break - # . ecx = in->read - 8b/-> *(esi+4) 1/r32/ecx - # . if (ecx >= in->write) break - 3b/compare *esi 1/r32/ecx - 7d/jump-if->= break/disp8 - # var c/eax: byte = in->data[in->read] - 31/xor %eax 0/r32/eax - 8a/copy-byte *(esi+ecx+0xc) 0/r32/AL - # if (c == ':') ++in->read and loop - { - 3d/compare-eax-and 0x3a/imm32/colon - 75/jump-if-!= break/disp8 - # ++in->read - ff 0/subop/increment *(esi+4) - # - eb/jump $next-mu-token:skip-trailing-delimiters/disp8 - } - # if (c == ',') ++in->read and loop - { - 3d/compare-eax-and 0x2c/imm32/comma - 75/jump-if-!= break/disp8 - # ++in->read - ff 0/subop/increment *(esi+4) - # - eb/jump $next-mu-token:skip-trailing-delimiters/disp8 - } - # else break - } $next-mu-token:end: # . restore registers 5f/pop-to-edi @@ -2668,8 +2572,8 @@ test-parse-var-with-type-and-register: # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp - # (eax..ecx) = "x/eax" - b8/copy-to-eax "x/eax"/imm32 + # (eax..ecx) = "x/eax:" + b8/copy-to-eax "x/eax:"/imm32 8b/-> *eax 1/r32/ecx 8d/copy-address *(eax+ecx+4) 1/r32/ecx 05/add-to-eax 4/imm32 @@ -2677,9 +2581,9 @@ test-parse-var-with-type-and-register: 51/push-ecx 50/push-eax 89/<- %ecx 4/r32/esp - # _test-input-stream contains ": int" + # _test-input-stream contains "int" (clear-stream _test-input-stream) - (write _test-input-stream ": int") + (write _test-input-stream "int") # (parse-var-with-type %ecx _test-input-stream) 8b/-> *eax 2/r32/edx # Var-name @@ -3147,7 +3051,7 @@ parse-mu-block: # in: (addr buffered-file), vars: (addr stack (handle var)), fn # clear-stream(line) # read-line-buffered(in, line) # if (line->write == 0) break # end of file - # word-slice = next-word(line) + # word-slice = next-mu-token(line) # if slice-empty?(word-slice) # end of line # continue # else if slice-starts-with?(word-slice, "#") @@ -3215,12 +3119,8 @@ $parse-mu-block:line-loop: # if (line->write == 0) break 81 7/subop/compare *ecx 0/imm32 0f 84/jump-if-= break/disp32 - # word-slice = next-word(line) - (next-word %ecx %edx) # We can't use next-mu-token here because of an ambiguity that has crept into our grammar. - # Colons are used to separate variable names from types. For example, `n: int`. Here the colon needs to be a separate word. - # Colons are also used to designate a label. Here the colon needs to be the final letter of a word. - # Maybe I should just disallow spaces before colons in all situations. - # Or be consistent and allow allow space before label name and colon. + # word-slice = next-mu-token(line) + (next-mu-token %ecx %edx) #? (write-buffered Stderr "word: ") #? (write-slice-buffered Stderr %edx) #? (write-buffered Stderr Newline) @@ -3363,7 +3263,7 @@ new-block-name: # fn: (handle function) -> result/eax: (handle var) ff 6/subop/push %eax ff 6/subop/push %edx 89/<- %eax 4/r32/esp - # var final-name/edx : (addr array byte) = slice-to-string(s) + # var final-name/edx: (addr array byte) = slice-to-string(s) (slice-to-string Heap %eax) # => eax 89/<- %edx 0/r32/eax # set result->var @@ -5055,7 +4955,7 @@ $clean-up-blocks:reclaim-loop: # if (vars->top <= 0) break 81 7/subop/compare *esi 0/imm32 # Stack-top 7e/jump-if-<= break/disp8 - # var v/eax : (handle var) = top(vars) + # var v/eax: (handle var) = top(vars) (top %esi) # => eax # if (v->block-depth < until-block-depth) break 39/compare *(eax+8) 1/r32/ecx # Var-block-depth |