about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2014-12-19 22:18:41 -0800
committerKartik K. Agaram <vc@akkartik.com>2014-12-19 22:22:17 -0800
commit4630b4aee88e312f2682eb17b98d0144e48fd7d5 (patch)
tree032a82f0502833d52a90682518179afb6a1c64ba
parentf45ea0cb5c4109bc573da1d0b2289b451a29b068 (diff)
downloadmu-4630b4aee88e312f2682eb17b98d0144e48fd7d5.tar.gz
442 - string 'split'
-rw-r--r--mu.arc72
-rw-r--r--mu.arc.t86
2 files changed, 158 insertions, 0 deletions
diff --git a/mu.arc b/mu.arc
index bb7cd6f2..c3527369 100644
--- a/mu.arc
+++ b/mu.arc
@@ -97,6 +97,9 @@
               byte-address (obj  size 1  address t  elem '(byte))
               string (obj array t  elem '(byte))  ; inspired by Go
               string-address (obj size 1  address t  elem '(string))
+              string-address-address (obj size 1  address t  elem '(string-address))
+              string-address-array (obj array t  elem '(string-address))
+              string-address-array-address (obj size 1  address t  elem '(string-address-array))
               character (obj size 1)  ; int32 like a Go rune
               character-address (obj size 1  address t  elem '(character))
               ; isolating function calls
@@ -1472,6 +1475,75 @@
   }
   (reply idx:integer))
 
+(init-fn split  ; string, character -> string-address-array-address
+  (default-scope:scope-address <- new scope:literal 30:literal)
+  (s:string-address <- next-input)
+  (delim:character <- next-input)  ; todo: unicode chars
+  ; empty string? return empty array
+  (len:integer <- length s:string-address/deref)
+  { begin
+    (empty?:boolean <- equal len:integer 0:literal)
+    (break-unless empty?:boolean)
+    (result:string-address-array-address <- new string-address-array:literal 0:literal)
+    (reply result:string-address-array-address)
+  }
+  ; count #pieces we need room for
+  (count:integer <- copy 1:literal)  ; n delimiters = n+1 pieces
+  (idx:integer <- copy 0:literal)
+  { begin
+    (idx:integer <- find-next s:string-address delim:character idx:integer)
+    (done?:boolean <- greater-or-equal idx:integer len:integer)
+    (break-if done?:boolean)
+    (idx:integer <- add idx:integer 1:literal)
+    (count:integer <- add count:integer 1:literal)
+    (loop)
+  }
+  ; allocate space
+;?   (print-primitive (("alloc: " literal)))
+;?   (print-primitive count:integer)
+;?   (print-primitive (("\n" literal)))
+  (result:string-address-array-address <- new string-address-array:literal count:integer)
+  ; repeatedly copy slices (start..end) until delimiter into result[curr-result]
+  (curr-result:integer <- copy 0:literal)
+  (start:integer <- copy 0:literal)
+  { begin
+    ; while next delim exists
+    (done?:boolean <- greater-or-equal start:integer len:integer)
+    (break-if done?:boolean)
+    (end:integer <- find-next s:string-address delim:character start:integer)
+;?     (print-primitive (("i: " literal)))
+;?     (print-primitive start:integer)
+;?     (print-primitive (("-" literal)))
+;?     (print-primitive end:integer)
+;?     (print-primitive ((" => " literal)))
+;?     (print-primitive curr-result:integer)
+;?     (print-primitive (("\n" literal)))
+    ; compute length of slice
+    (slice-len:integer <- subtract end:integer start:integer)
+    ; allocate result[curr-result]
+    (dest:string-address-address <- index-address result:string-address-array-address/deref curr-result:integer)
+    (dest:string-address-address/deref <- new string:literal slice-len:integer)
+    ; copy start..end into result[curr-result]
+    (src-idx:integer <- copy start:integer)
+    (dest-idx:integer <- copy 0:literal)
+    { begin
+      (end-copy?:boolean <- greater-or-equal src-idx:integer end:integer)
+      (break-if end-copy?:boolean)
+      (src:character <- index s:string-address/deref src-idx:integer)
+      (tmp:character-address <- index-address dest:string-address-address/deref/deref dest-idx:integer)
+      (tmp:character-address/deref <- copy src:character)
+      (src-idx:integer <- add src-idx:integer 1:literal)
+      (dest-idx:integer <- add dest-idx:integer 1:literal)
+      (loop)
+    }
+    ; slide over to next slice
+    (start:integer <- add end:integer 1:literal)
+    (curr-result:integer <- add curr-result:integer 1:literal)
+    (loop)
+  }
+  (reply result:string-address-array-address)
+)
+
 )  ; section 100 for system software
 
 ;; load all provided files and start at 'main'
diff --git a/mu.arc.t b/mu.arc.t
index 2e284b7d..2788e9c0 100644
--- a/mu.arc.t
+++ b/mu.arc.t
@@ -3409,6 +3409,92 @@
 (if (~is memory*.2 4)
   (prn "F - 'find-next' finds second of multiple options"))
 
+(reset)
+(new-trace "string-split")
+(add-code '((function main [
+              (1:string-address <- new "a/b")
+              (2:string-address-array-address <- split 1:string-address ((#\/ literal)))
+             ])))
+;? (set dump-trace*)
+(run 'main)
+(each routine completed-routines*
+  (aif rep.routine!error (prn "error - " it)))
+(let base memory*.2
+;?   (prn base " " memory*.base)
+  (if (or (~is memory*.base 2)
+;?           (do1 nil prn.111)
+          (~memory-contains-array (memory* (+ base 1)) "a")
+;?           (do1 nil prn.111)
+          (~memory-contains-array (memory* (+ base 2)) "b"))
+    (prn "F - 'split' cuts string at delimiter")))
+
+(reset)
+(new-trace "string-split2")
+(add-code '((function main [
+              (1:string-address <- new "a/b/c")
+              (2:string-address-array-address <- split 1:string-address ((#\/ literal)))
+             ])))
+;? (set dump-trace*)
+(run 'main)
+(each routine completed-routines*
+  (aif rep.routine!error (prn "error - " it)))
+(let base memory*.2
+;?   (prn base " " memory*.base)
+  (if (or (~is memory*.base 3)
+;?           (do1 nil prn.111)
+          (~memory-contains-array (memory* (+ base 1)) "a")
+;?           (do1 nil prn.111)
+          (~memory-contains-array (memory* (+ base 2)) "b")
+;?           (do1 nil prn.111)
+          (~memory-contains-array (memory* (+ base 3)) "c"))
+    (prn "F - 'split' cuts string at two delimiters")))
+
+(reset)
+(new-trace "string-split-missing")
+(add-code '((function main [
+              (1:string-address <- new "abc")
+              (2:string-address-array-address <- split 1:string-address ((#\/ literal)))
+             ])))
+(run 'main)
+(each routine completed-routines*
+  (aif rep.routine!error (prn "error - " it)))
+(let base memory*.2
+  (if (or (~is memory*.base 1)
+          (~memory-contains-array (memory* (+ base 1)) "abc"))
+    (prn "F - 'split' handles missing delimiter")))
+
+(reset)
+(new-trace "string-split-empty")
+(add-code '((function main [
+              (1:string-address <- new "")
+              (2:string-address-array-address <- split 1:string-address ((#\/ literal)))
+             ])))
+;? (= dump-trace* (obj whitelist '("run")))
+(run 'main)
+(each routine completed-routines*
+  (aif rep.routine!error (prn "error - " it)))
+(let base memory*.2
+;?   (prn base " " memory*.base)
+  (if (~is memory*.base 0)
+    (prn "F - 'split' handles empty string")))
+
+(reset)
+(new-trace "string-split-empty-piece")
+(add-code '((function main [
+              (1:string-address <- new "a/b//c")
+              (2:string-address-array-address <- split 1:string-address ((#\/ literal)))
+             ])))
+(run 'main)
+(each routine completed-routines*
+  (aif rep.routine!error (prn "error - " it)))
+(let base memory*.2
+  (if (or (~is memory*.base 4)
+          (~memory-contains-array (memory* (+ base 1)) "a")
+          (~memory-contains-array (memory* (+ base 2)) "b")
+          (~memory-contains-array (memory* (+ base 3)) "")
+          (~memory-contains-array (memory* (+ base 4)) "c"))
+    (prn "F - 'split' cuts string at two delimiters")))
+
 )  ; section 100 for string utilities
 
 (reset)