about summary refs log tree commit diff stats
path: root/linux/411string.mu
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2021-03-03 22:09:50 -0800
committerKartik K. Agaram <vc@akkartik.com>2021-03-03 22:21:03 -0800
commit71e4f3812982dba2efb471283d310224e8db363e (patch)
treeea111a1acb8b8845dbda39c0e1b4bac1d198143b /linux/411string.mu
parentc6b928be29ac8cdb4e4d6e1eaa20420ff03e5a4c (diff)
downloadmu-71e4f3812982dba2efb471283d310224e8db363e.tar.gz
7842 - new directory organization
Baremetal is now the default build target and therefore has its sources
at the top-level. Baremetal programs build using the phase-2 Mu toolchain
that requires a Linux kernel. This phase-2 codebase which used to be at
the top-level is now under the linux/ directory. Finally, the phase-2 toolchain,
while self-hosting, has a way to bootstrap from a C implementation, which
is now stored in linux/bootstrap. The bootstrap C implementation uses some
literate programming tools that are now in linux/bootstrap/tools.

So the whole thing has gotten inverted. Each directory should build one
artifact and include the main sources (along with standard library). Tools
used for building it are relegated to sub-directories, even though those
tools are often useful in their own right, and have had lots of interesting
programs written using them.

A couple of things have gotten dropped in this process:
  - I had old ways to run on just a Linux kernel, or with a Soso kernel.
    No more.
  - I had some old tooling for running a single test at the cursor. I haven't
    used that lately. Maybe I'll bring it back one day.

The reorg isn't done yet. Still to do:
  - redo documentation everywhere. All the README files, all other markdown,
    particularly vocabulary.md.
  - clean up how-to-run comments at the start of programs everywhere
  - rethink what to do with the html/ directory. Do we even want to keep
    supporting it?

In spite of these shortcomings, all the scripts at the top-level, linux/
and linux/bootstrap are working. The names of the scripts also feel reasonable.
This is a good milestone to take stock at.
Diffstat (limited to 'linux/411string.mu')
-rw-r--r--linux/411string.mu125
1 files changed, 125 insertions, 0 deletions
diff --git a/linux/411string.mu b/linux/411string.mu
new file mode 100644
index 00000000..cf0471ac
--- /dev/null
+++ b/linux/411string.mu
@@ -0,0 +1,125 @@
+# read up to 'len' graphemes after skipping the first 'start' ones
+fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) {
+  var in-stream: (stream byte 0x100)
+  var in-stream-addr/esi: (addr stream byte) <- address in-stream
+  write in-stream-addr, in
+  var out-stream: (stream byte 0x100)
+  var out-stream-addr/edi: (addr stream byte) <- address out-stream
+  $substring:core: {
+    # skip 'start' graphemes
+    var i/eax: int <- copy 0
+    {
+      compare i, start
+      break-if->=
+      {
+        var dummy/eax: grapheme <- read-grapheme in-stream-addr
+        compare dummy, 0xffffffff/end-of-file
+        break-if-= $substring:core
+      }
+      i <- increment
+      loop
+    }
+    # copy 'len' graphemes
+    i <- copy 0
+    {
+      compare i, len
+      break-if->=
+      {
+        var g/eax: grapheme <- read-grapheme in-stream-addr
+        compare g, 0xffffffff/end-of-file
+        break-if-= $substring:core
+        write-grapheme out-stream-addr, g
+      }
+      i <- increment
+      loop
+    }
+  }
+  stream-to-array out-stream-addr, out-ah
+}
+
+fn test-substring {
+  var out-h: (handle array byte)
+  var out-ah/edi: (addr handle array byte) <- address out-h
+  # prefix substrings
+  substring 0, 0, 3, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "", "F - test-substring/null"
+  substring "", 0, 3, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+#?   print-string-to-real-screen out
+#?   print-string-to-real-screen "\n"
+  check-strings-equal out, "", "F - test-substring/empty"
+  #
+  substring "abcde", 0, 3, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+#?   print-string-to-real-screen out
+#?   print-string-to-real-screen "\n"
+  check-strings-equal out, "abc", "F - test-substring/truncate"
+  #
+  substring "abcde", 0, 5, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "abcde", "F - test-substring/all"
+  #
+  substring "abcde", 0, 7, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "abcde", "F - test-substring/too-small"
+  # substrings outside string
+  substring "abcde", 6, 1, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "", "F - test-substring/start-too-large"
+  # trim prefix
+  substring "", 2, 3, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "", "F - test-substring/middle-empty"
+  #
+  substring "abcde", 1, 2, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "bc", "F - test-substring/middle-truncate"
+  #
+  substring "abcde", 1, 4, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "bcde", "F - test-substring/middle-all"
+  #
+  substring "abcde", 1, 5, out-ah
+  var out/eax: (addr array byte) <- lookup *out-ah
+  check-strings-equal out, "bcde", "F - test-substring/middle-too-small"
+}
+
+fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte)) {
+  var in-stream: (stream byte 0x100)
+  var in-stream-addr/esi: (addr stream byte) <- address in-stream
+  write in-stream-addr, in
+  var tokens-stream: (stream (handle array byte) 0x100)
+  var tokens-stream-addr/edi: (addr stream (handle array byte)) <- address tokens-stream
+  var curr-stream: (stream byte 0x100)
+  var curr-stream-addr/ecx: (addr stream byte) <- address curr-stream
+  $split-string:core: {
+    var g/eax: grapheme <- read-grapheme in-stream-addr
+    compare g, 0xffffffff
+    break-if-=
+#?     print-grapheme-to-real-screen g
+#?     print-string-to-real-screen "\n"
+    compare g, delim
+    {
+      break-if-!=
+      # token complete; flush
+      var token: (handle array byte)
+      var token-ah/eax: (addr handle array byte) <- address token
+      stream-to-array curr-stream-addr, token-ah
+      write-to-stream tokens-stream-addr, token-ah
+      clear-stream curr-stream-addr
+      loop $split-string:core
+    }
+    write-grapheme curr-stream-addr, g
+    loop
+  }
+  stream-to-array tokens-stream-addr, out
+}
+
+fn test-split-string {
+  var out-h: (handle array (handle array byte))
+  var out-ah/edi: (addr handle array (handle array byte)) <- address out-h
+  # prefix substrings
+  split-string "bab", 0x61, out-ah
+  # no crash
+}