about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2015-02-08 13:26:16 -0800
committerKartik K. Agaram <vc@akkartik.com>2015-02-08 13:26:16 -0800
commitd1dc633bd2f436af34b9d974f3dbe466bf6b0b87 (patch)
tree2c437648e6bca4811382ba7881e666f492d50b81
parent465c5c06e7657fc8b1ef0e383d43e171db6e02de (diff)
downloadmu-d1dc633bd2f436af34b9d974f3dbe466bf6b0b87.tar.gz
720 - substring matching and searching
-rw-r--r--mu.arc67
-rw-r--r--mu.arc.t169
2 files changed, 231 insertions, 5 deletions
diff --git a/mu.arc b/mu.arc
index 71e2f251..73de6714 100644
--- a/mu.arc
+++ b/mu.arc
@@ -2109,21 +2109,78 @@
 
 (init-fn find-next  ; string, character, index -> next index
   (default-space:space-address <- new space:literal 30:literal)
-  (s:string-address <- next-input)
-  (needle:character <- next-input)
+  (text:string-address <- next-input)
+  (pattern:character <- next-input)
   (idx:integer <- next-input)
-  (len:integer <- length s:string-address/deref)
+  (len:integer <- length text:string-address/deref)
   { begin
     (eof?:boolean <- greater-or-equal idx:integer len:integer)
     (break-if eof?:boolean)
-    (curr:byte <- index s:string-address/deref idx:integer)
-    (found?:boolean <- equal curr:byte needle:character)
+    (curr:byte <- index text:string-address/deref idx:integer)
+    (found?:boolean <- equal curr:byte pattern:character)
     (break-if found?:boolean)
     (idx:integer <- add idx:integer 1:literal)
     (loop)
   }
   (reply idx:integer))
 
+(init-fn find-substring/variant:find-next
+  (default-space:space-address <- new space:literal 30:literal)
+  ; fairly dumb algorithm; used for parsing code and traces
+  (text:string-address <- next-input)
+  (pattern:string-address <- next-input)
+  (idx:integer <- next-input)
+  (first:character <- index pattern:string-address/deref 0:literal)
+  ; repeatedly check for match at current idx
+  (len:integer <- length text:string-address/deref)
+  { begin
+    ; does some unnecessary work checking for substrings even when there isn't enough of text left
+    (eof?:boolean <- greater-or-equal idx:integer len:integer)
+    (break-if eof?:boolean)
+    (found?:boolean <- match-at text:string-address pattern:string-address idx:integer)
+    (break-if found?:boolean)
+    (idx:integer <- add idx:integer 1:literal)
+    ; optimization: skip past indices that definitely won't match
+    (idx:integer <- find-next text:string-address first:character idx:integer)
+    (loop)
+  }
+  (reply idx:integer)
+)
+
+(init-fn match-at
+  (default-space:space-address <- new space:literal 30:literal)
+  ; fairly dumb algorithm; used for parsing code and traces
+  (text:string-address <- next-input)
+  (pattern:string-address <- next-input)
+  (idx:integer <- next-input)
+  (pattern-len:integer <- length pattern:string-address/deref)
+  ; check that there's space left for the pattern
+  { begin
+    (x:integer <- length text:string-address/deref)
+    (x:integer <- subtract x:integer pattern-len:integer)
+    (enough-room?:boolean <- lesser-or-equal idx:integer x:integer)
+    (break-if enough-room?:boolean)
+    (reply nil:literal)
+  }
+  ; check each character of pattern
+  (pattern-idx:integer <- copy 0:literal)
+  { begin
+    (done?:boolean <- greater-or-equal pattern-idx:integer pattern-len:integer)
+    (break-if done?:boolean)
+    (c:character <- index text:string-address/deref idx:integer)
+    (exp:character <- index pattern:string-address/deref pattern-idx:integer)
+    { begin
+      (match?:boolean <- equal c:character exp:character)
+      (break-if match?:boolean)
+      (reply nil:literal)
+    }
+    (idx:integer <- add idx:integer 1:literal)
+    (pattern-idx:integer <- add pattern-idx:integer 1:literal)
+    (loop)
+  }
+  (reply t:literal)
+)
+
 (init-fn split  ; string, character -> string-address-array-address
   (default-space:space-address <- new space:literal 30:literal)
   (s:string-address <- next-input)
diff --git a/mu.arc.t b/mu.arc.t
index 68dfbd73..28b7fbe4 100644
--- a/mu.arc.t
+++ b/mu.arc.t
@@ -4267,6 +4267,175 @@
   (prn "F - 'find-next' finds second of multiple options"))
 
 (reset)
+(new-trace "match-at")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "ab")
+      (3:boolean <- match-at 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 t)
+  (prn "F - 'match-at' matches substring at given index"))
+
+(reset)
+(new-trace "match-at-reflexive")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (3:boolean <- match-at 1:string-address 1:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 t)
+  (prn "F - 'match-at' always matches a string at itself at index 0"))
+
+(reset)
+(new-trace "match-at-outside-bounds")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "a")
+      (3:boolean <- match-at 1:string-address 2:string-address 4:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 nil)
+  (prn "F - 'match-at' always fails to match outside the bounds of the text"))
+
+(reset)
+(new-trace "match-at-empty-pattern")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "")
+      (3:boolean <- match-at 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 t)
+  (prn "F - 'match-at' always matches empty pattern"))
+
+(reset)
+(new-trace "match-at-empty-pattern-outside-bounds")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "")
+      (3:boolean <- match-at 1:string-address 2:string-address 4:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 nil)
+  (prn "F - 'match-at' matches empty pattern -- unless index is out of bounds"))
+
+(reset)
+(new-trace "match-at-empty-text")
+(add-code
+  '((function main [
+      (1:string-address <- new "")
+      (2:string-address <- new "abc")
+      (3:boolean <- match-at 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 nil)
+  (prn "F - 'match-at' never matches empty text"))
+
+(reset)
+(new-trace "match-at-empty-against-empty")
+(add-code
+  '((function main [
+      (1:string-address <- new "")
+      (3:boolean <- match-at 1:string-address 1:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 t)
+  (prn "F - 'match-at' never matches empty text -- unless pattern is also empty"))
+
+(reset)
+(new-trace "match-at-inside-bounds")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "bc")
+      (3:boolean <- match-at 1:string-address 2:string-address 1:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 t)
+  (prn "F - 'match-at' matches inner substring"))
+
+(reset)
+(new-trace "match-at-inside-bounds-2")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "bc")
+      (3:boolean <- match-at 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 nil)
+  (prn "F - 'match-at' matches inner substring - 2"))
+
+(reset)
+(new-trace "find-substring")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "bc")
+      (3:integer <- find-substring 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+;? (prn memory*.3) ;? 1
+(when (~is memory*.3 1)
+  (prn "F - 'find-substring' returns index of match"))
+
+(reset)
+(new-trace "find-substring-2")
+(add-code
+  '((function main [
+      (1:string-address <- new "abcd")
+      (2:string-address <- new "bc")
+      (3:integer <- find-substring 1:string-address 2:string-address 1:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 1)
+  (prn "F - 'find-substring' returns provided index if it matches"))
+
+(reset)
+(new-trace "find-substring-no-match")
+(add-code
+  '((function main [
+      (1:string-address <- new "abc")
+      (2:string-address <- new "bd")
+      (3:integer <- find-substring 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 3)
+  (prn "F - 'find-substring' returns out-of-bounds index on no-match"))
+
+(reset)
+(new-trace "find-substring-suffix-match")
+(add-code
+  '((function main [
+      (1:string-address <- new "abcd")
+      (2:string-address <- new "cd")
+      (3:integer <- find-substring 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 2)
+  (prn "F - 'find-substring' returns provided index if it matches"))
+
+(reset)
+(new-trace "find-substring-suffix-match-2")
+(add-code
+  '((function main [
+      (1:string-address <- new "abcd")
+      (2:string-address <- new "cde")
+      (3:integer <- find-substring 1:string-address 2:string-address 0:literal)
+     ])))
+(run 'main)
+(when (~is memory*.3 4)
+  (prn "F - 'find-substring' returns provided index if it matches"))
+
+;? (quit) ;? 1
+
+(reset)
 (new-trace "string-split")
 (add-code
   '((function main [