https://github.com/akkartik/mu/blob/main/linux/411string.mu
  1 # read up to 'len' code-point-utf8s after skipping the first 'start' ones
  2 fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) {
  3   var in-stream: (stream byte 0x100)
  4   var in-stream-addr/esi: (addr stream byte) <- address in-stream
  5   write in-stream-addr, in
  6   var out-stream: (stream byte 0x100)
  7   var out-stream-addr/edi: (addr stream byte) <- address out-stream
  8   $substring:core: {
  9     # skip 'start' code-point-utf8s
 10     var i/eax: int <- copy 0
 11     {
 12       compare i, start
 13       break-if->=
 14       {
 15         var dummy/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
 16         compare dummy, 0xffffffff/end-of-file
 17         break-if-= $substring:core
 18       }
 19       i <- increment
 20       loop
 21     }
 22     # copy 'len' code-point-utf8s
 23     i <- copy 0
 24     {
 25       compare i, len
 26       break-if->=
 27       {
 28         var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
 29         compare g, 0xffffffff/end-of-file
 30         break-if-= $substring:core
 31         write-code-point-utf8 out-stream-addr, g
 32       }
 33       i <- increment
 34       loop
 35     }
 36   }
 37   stream-to-array out-stream-addr, out-ah
 38 }
 39 
 40 fn test-substring {
 41   var out-h: (handle array byte)
 42   var out-ah/edi: (addr handle array byte) <- address out-h
 43   # prefix substrings
 44   substring 0, 0, 3, out-ah
 45   var out/eax: (addr array byte) <- lookup *out-ah
 46   check-strings-equal out, "", "F - test-substring/null"
 47   substring "", 0, 3, out-ah
 48   var out/eax: (addr array byte) <- lookup *out-ah
 49 #?   print-string-to-real-screen out
 50 #?   print-string-to-real-screen "\n"
 51   check-strings-equal out, "", "F - test-substring/empty"
 52   #
 53   substring "abcde", 0, 3, out-ah
 54   var out/eax: (addr array byte) <- lookup *out-ah
 55 #?   print-string-to-real-screen out
 56 #?   print-string-to-real-screen "\n"
 57   check-strings-equal out, "abc", "F - test-substring/truncate"
 58   #
 59   substring "abcde", 0, 5, out-ah
 60   var out/eax: (addr array byte) <- lookup *out-ah
 61   check-strings-equal out, "abcde", "F - test-substring/all"
 62   #
 63   substring "abcde", 0, 7, out-ah
 64   var out/eax: (addr array byte) <- lookup *out-ah
 65   check-strings-equal out, "abcde", "F - test-substring/too-small"
 66   # substrings outside string
 67   substring "abcde", 6, 1, out-ah
 68   var out/eax: (addr array byte) <- lookup *out-ah
 69   check-strings-equal out, "", "F - test-substring/start-too-large"
 70   # trim prefix
 71   substring "", 2, 3, out-ah
 72   var out/eax: (addr array byte) <- lookup *out-ah
 73   check-strings-equal out, "", "F - test-substring/middle-empty"
 74   #
 75   substring "abcde", 1, 2, out-ah
 76   var out/eax: (addr array byte) <- lookup *out-ah
 77   check-strings-equal out, "bc", "F - test-substring/middle-truncate"
 78   #
 79   substring "abcde", 1, 4, out-ah
 80   var out/eax: (addr array byte) <- lookup *out-ah
 81   check-strings-equal out, "bcde", "F - test-substring/middle-all"
 82   #
 83   substring "abcde", 1, 5, out-ah
 84   var out/eax: (addr array byte) <- lookup *out-ah
 85   check-strings-equal out, "bcde", "F - test-substring/middle-too-small"
 86 }
 87 
 88 fn split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte)) {
 89   var in-stream: (stream byte 0x100)
 90   var in-stream-addr/esi: (addr stream byte) <- address in-stream
 91   write in-stream-addr, in
 92   var tokens-stream: (stream (handle array byte) 0x100)
 93   var tokens-stream-addr/edi: (addr stream (handle array byte)) <- address tokens-stream
 94   var curr-stream: (stream byte 0x100)
 95   var curr-stream-addr/ecx: (addr stream byte) <- address curr-stream
 96   $split-string:core: {
 97     var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr
 98     compare g, 0xffffffff
 99     break-if-=
100 #?     print-code-point-utf8-to-real-screen g
101 #?     print-string-to-real-screen "\n"
102     compare g, delim
103     {
104       break-if-!=
105       # token complete; flush
106       var token: (handle array byte)
107       var token-ah/eax: (addr handle array byte) <- address token
108       stream-to-array curr-stream-addr, token-ah
109       write-to-stream tokens-stream-addr, token-ah
110       clear-stream curr-stream-addr
111       loop $split-string:core
112     }
113     write-code-point-utf8 curr-stream-addr, g
114     loop
115   }
116   stream-to-array tokens-stream-addr, out
117 }
118 
119 fn test-split-string {
120   var out-h: (handle array (handle array byte))
121   var out-ah/edi: (addr handle array (handle array byte)) <- address out-h
122   # prefix substrings
123   split-string "bab", 0x61, out-ah
124   # no crash
125 }