diff options
-rw-r--r-- | mu.arc | 67 | ||||
-rw-r--r-- | mu.arc.t | 169 |
2 files changed, 231 insertions, 5 deletions
diff --git a/mu.arc b/mu.arc index 71e2f251..73de6714 100644 --- a/mu.arc +++ b/mu.arc @@ -2109,21 +2109,78 @@ (init-fn find-next ; string, character, index -> next index (default-space:space-address <- new space:literal 30:literal) - (s:string-address <- next-input) - (needle:character <- next-input) + (text:string-address <- next-input) + (pattern:character <- next-input) (idx:integer <- next-input) - (len:integer <- length s:string-address/deref) + (len:integer <- length text:string-address/deref) { begin (eof?:boolean <- greater-or-equal idx:integer len:integer) (break-if eof?:boolean) - (curr:byte <- index s:string-address/deref idx:integer) - (found?:boolean <- equal curr:byte needle:character) + (curr:byte <- index text:string-address/deref idx:integer) + (found?:boolean <- equal curr:byte pattern:character) (break-if found?:boolean) (idx:integer <- add idx:integer 1:literal) (loop) } (reply idx:integer)) +(init-fn find-substring/variant:find-next + (default-space:space-address <- new space:literal 30:literal) + ; fairly dumb algorithm; used for parsing code and traces + (text:string-address <- next-input) + (pattern:string-address <- next-input) + (idx:integer <- next-input) + (first:character <- index pattern:string-address/deref 0:literal) + ; repeatedly check for match at current idx + (len:integer <- length text:string-address/deref) + { begin + ; does some unnecessary work checking for substrings even when there isn't enough of text left + (eof?:boolean <- greater-or-equal idx:integer len:integer) + (break-if eof?:boolean) + (found?:boolean <- match-at text:string-address pattern:string-address idx:integer) + (break-if found?:boolean) + (idx:integer <- add idx:integer 1:literal) + ; optimization: skip past indices that definitely won't match + (idx:integer <- find-next text:string-address first:character idx:integer) + (loop) + } + (reply idx:integer) +) + +(init-fn match-at + (default-space:space-address <- new space:literal 30:literal) + ; fairly dumb algorithm; used for parsing code and traces + (text:string-address <- next-input) + (pattern:string-address <- next-input) + (idx:integer <- next-input) + (pattern-len:integer <- length pattern:string-address/deref) + ; check that there's space left for the pattern + { begin + (x:integer <- length text:string-address/deref) + (x:integer <- subtract x:integer pattern-len:integer) + (enough-room?:boolean <- lesser-or-equal idx:integer x:integer) + (break-if enough-room?:boolean) + (reply nil:literal) + } + ; check each character of pattern + (pattern-idx:integer <- copy 0:literal) + { begin + (done?:boolean <- greater-or-equal pattern-idx:integer pattern-len:integer) + (break-if done?:boolean) + (c:character <- index text:string-address/deref idx:integer) + (exp:character <- index pattern:string-address/deref pattern-idx:integer) + { begin + (match?:boolean <- equal c:character exp:character) + (break-if match?:boolean) + (reply nil:literal) + } + (idx:integer <- add idx:integer 1:literal) + (pattern-idx:integer <- add pattern-idx:integer 1:literal) + (loop) + } + (reply t:literal) +) + (init-fn split ; string, character -> string-address-array-address (default-space:space-address <- new space:literal 30:literal) (s:string-address <- next-input) diff --git a/mu.arc.t b/mu.arc.t index 68dfbd73..28b7fbe4 100644 --- a/mu.arc.t +++ b/mu.arc.t @@ -4267,6 +4267,175 @@ (prn "F - 'find-next' finds second of multiple options")) (reset) +(new-trace "match-at") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "ab") + (3:boolean <- match-at 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 t) + (prn "F - 'match-at' matches substring at given index")) + +(reset) +(new-trace "match-at-reflexive") +(add-code + '((function main [ + (1:string-address <- new "abc") + (3:boolean <- match-at 1:string-address 1:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 t) + (prn "F - 'match-at' always matches a string at itself at index 0")) + +(reset) +(new-trace "match-at-outside-bounds") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "a") + (3:boolean <- match-at 1:string-address 2:string-address 4:literal) + ]))) +(run 'main) +(when (~is memory*.3 nil) + (prn "F - 'match-at' always fails to match outside the bounds of the text")) + +(reset) +(new-trace "match-at-empty-pattern") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "") + (3:boolean <- match-at 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 t) + (prn "F - 'match-at' always matches empty pattern")) + +(reset) +(new-trace "match-at-empty-pattern-outside-bounds") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "") + (3:boolean <- match-at 1:string-address 2:string-address 4:literal) + ]))) +(run 'main) +(when (~is memory*.3 nil) + (prn "F - 'match-at' matches empty pattern -- unless index is out of bounds")) + +(reset) +(new-trace "match-at-empty-text") +(add-code + '((function main [ + (1:string-address <- new "") + (2:string-address <- new "abc") + (3:boolean <- match-at 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 nil) + (prn "F - 'match-at' never matches empty text")) + +(reset) +(new-trace "match-at-empty-against-empty") +(add-code + '((function main [ + (1:string-address <- new "") + (3:boolean <- match-at 1:string-address 1:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 t) + (prn "F - 'match-at' never matches empty text -- unless pattern is also empty")) + +(reset) +(new-trace "match-at-inside-bounds") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "bc") + (3:boolean <- match-at 1:string-address 2:string-address 1:literal) + ]))) +(run 'main) +(when (~is memory*.3 t) + (prn "F - 'match-at' matches inner substring")) + +(reset) +(new-trace "match-at-inside-bounds-2") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "bc") + (3:boolean <- match-at 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 nil) + (prn "F - 'match-at' matches inner substring - 2")) + +(reset) +(new-trace "find-substring") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "bc") + (3:integer <- find-substring 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +;? (prn memory*.3) ;? 1 +(when (~is memory*.3 1) + (prn "F - 'find-substring' returns index of match")) + +(reset) +(new-trace "find-substring-2") +(add-code + '((function main [ + (1:string-address <- new "abcd") + (2:string-address <- new "bc") + (3:integer <- find-substring 1:string-address 2:string-address 1:literal) + ]))) +(run 'main) +(when (~is memory*.3 1) + (prn "F - 'find-substring' returns provided index if it matches")) + +(reset) +(new-trace "find-substring-no-match") +(add-code + '((function main [ + (1:string-address <- new "abc") + (2:string-address <- new "bd") + (3:integer <- find-substring 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 3) + (prn "F - 'find-substring' returns out-of-bounds index on no-match")) + +(reset) +(new-trace "find-substring-suffix-match") +(add-code + '((function main [ + (1:string-address <- new "abcd") + (2:string-address <- new "cd") + (3:integer <- find-substring 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 2) + (prn "F - 'find-substring' returns provided index if it matches")) + +(reset) +(new-trace "find-substring-suffix-match-2") +(add-code + '((function main [ + (1:string-address <- new "abcd") + (2:string-address <- new "cde") + (3:integer <- find-substring 1:string-address 2:string-address 0:literal) + ]))) +(run 'main) +(when (~is memory*.3 4) + (prn "F - 'find-substring' returns provided index if it matches")) + +;? (quit) ;? 1 + +(reset) (new-trace "string-split") (add-code '((function main [ |