From 630e93635c1fe2f0c2bde7823ddeb49fb48f6872 Mon Sep 17 00:00:00 2001
From: "Kartik K. Agaram" <vc@akkartik.com>
Date: Fri, 10 Oct 2014 15:04:14 -0700
Subject: 123 - experiment: build the reading flow around the *test* file

---
 edit.arc.t |  25 +++
 mu.arc     | 101 +++++------
 mu.arc.t   | 584 ++++++++++++++++++++++++++++++++++++++++++++-----------------
 sys.arc    |  58 ------
 sys.arc.t  |  23 ---
 5 files changed, 486 insertions(+), 305 deletions(-)
 delete mode 100644 sys.arc
 delete mode 100644 sys.arc.t

diff --git a/edit.arc.t b/edit.arc.t
index 6272699b..3986cbfc 100644
--- a/edit.arc.t
+++ b/edit.arc.t
@@ -1,5 +1,30 @@
 (load "mu.arc")
 
+(on-init
+  (= types* (obj
+              ; Each type must be scalar or array, sum or product or primitive
+              location (obj size 1)
+              integer (obj size 1)
+              boolean (obj size 1)
+              boolean-address (obj size 1  address t)
+              byte (obj size 1)
+;?               string (obj array t  elem 'byte)  ; inspired by Go
+              character (obj size 1)  ; int32 like a Go rune
+              character-address (obj size 1  address t  elem 'character)
+              string (obj size 1)  ; temporary hack
+              ; arrays consist of an integer length followed by the right number of elems
+              integer-array (obj array t  elem 'integer)
+              integer-address (obj size 1  address t  elem 'integer)  ; pointer to int
+              ; records consist of a series of elems, corresponding to a list of types
+              integer-boolean-pair (obj size 2  record t  elems '(integer boolean))
+              ; editor
+              line (obj array t  elem 'character)
+              line-address (obj size 1  address t  elem 'line)
+              line-address-address (obj size 1  address t  elem 'line-address)
+              screen (obj array t  elem 'line-address)
+              screen-address (obj size 1  address t  elem 'screen)
+              )))
+
 (reset)
 (new-trace "new-screen")
 ;? (set dump-trace*)
diff --git a/mu.arc b/mu.arc
index c2eebc47..9859be1c 100644
--- a/mu.arc
+++ b/mu.arc
@@ -1,5 +1,4 @@
-; things that a future assembler will need separate memory for:
-;   code; types; args channel
+; what happens when our virtual machine starts up
 (= initialization-fns* (queue))
 (def reset ()
   (each f (as cons initialization-fns*)
@@ -9,6 +8,19 @@
   `(enq (fn () ,@body)
         initialization-fns*))
 
+(mac init-fn (name . body)
+  `(enq (fn () (= (function* ',name) ',body))
+        initialization-fns*))
+
+; things that a future assembler will need separate memory for:
+;   code; types; args channel
+(def clear ()
+  (= types* (table))
+  (= memory* (table))
+  (= function* (table)))
+(enq clear initialization-fns*)
+
+; persisting and checking traces for each test
 (= traces* (queue))
 (= trace-dir* ".traces/")
 (ensure-dir trace-dir*)
@@ -60,53 +72,11 @@
   (each (expected-label expected-msg)  expected-contents
     (prn "  ! " expected-label ": " expected-msg)))
 
-(mac init-fn (name . body)
-  `(enq (fn () (= (function* ',name) ',body))
-        initialization-fns*))
-
-(def clear ()
-  (= types* (obj
-              ; must be scalar or array, sum or product or primitive
-              type (obj size 1)
-              type-array (obj array t  elem 'type)
-              type-array-address (obj size 1  address t  elem 'type-array)
-              typeinfo (obj size 5  record t  elems '(integer boolean boolean boolean type-array))
-              typeinfo-address (obj size 1  address t  elem 'typeinfo)
-              typeinfo-address-array (obj array t  elem 'typeinfo-address)
-              location (obj size 1)
-              integer (obj size 1)
-              boolean (obj size 1)
-              boolean-address (obj size 1  address t)
-              byte (obj size 1)
-;?               string (obj array t  elem 'byte)  ; inspired by Go
-              character (obj size 1)  ; int32 like a Go rune
-              character-address (obj size 1  address t  elem 'character)
-              string (obj size 1)  ; temporary hack
-              ; arrays consist of an integer length followed by the right number of elems
-              integer-array (obj array t  elem 'integer)
-              integer-address (obj size 1  address t  elem 'integer)  ; pointer to int
-              ; records consist of a series of elems, corresponding to a list of types
-              integer-boolean-pair (obj size 2  record t  elems '(integer boolean))
-              integer-boolean-pair-address (obj size 1  address t  elem 'integer-boolean-pair)
-              integer-boolean-pair-array (obj array t  elem 'integer-boolean-pair)
-              integer-integer-pair (obj size 2  record t  elems '(integer integer))
-              integer-point-pair (obj size 2  record t  elems '(integer integer-integer-pair))
-              custodian  (obj size 1  record t  elems '(integer))
-              ; editor
-              line (obj array t  elem 'character)
-              line-address (obj size 1  address t  elem 'line)
-              line-address-address (obj size 1  address t  elem 'line-address)
-              screen (obj array t  elem 'line-address)
-              screen-address (obj size 1  address t  elem 'screen)
-              ))
-  (= memory* (table))
-  (= function* (table)))
-(enq clear initialization-fns*)
-
 (def add-fns (fns)
   (each (name . body) fns
     (= function*.name (convert-braces body))))
 
+; running mu
 (def v (operand)  ; for value
   operand.0)
 
@@ -269,6 +239,7 @@
 ;?         (prn op " " arg " -> " oarg)
         (let tmp
               (case op
+                ; arithmetic
                 add
                   (do (trace "add" (m arg.0) " " (m arg.1))
                   (+ (m arg.0) (m arg.1))
@@ -282,12 +253,16 @@
                 idiv
                   (list (trunc:/ (m arg.0) (m arg.1))
                         (mod (m arg.0) (m arg.1)))
+
+                ; boolean
                 and
                   (and (m arg.0) (m arg.1))
                 or
                   (or (m arg.0) (m arg.1))
                 not
                   (not (m arg.0))
+
+                ; comparison
                 eq
                   (is (m arg.0) (m arg.1))
                 neq
@@ -302,18 +277,8 @@
                   (<= (m arg.0) (m arg.1))
                 ge
                   (>= (m arg.0) (m arg.1))
-                arg
-                  (let idx (if arg
-                             arg.0
-                             (do1 caller-arg-idx.context
-                                (++ caller-arg-idx.context)))
-;?                     (prn idx)
-;?                     (prn caller-args.context)
-                    (m caller-args.context.idx))
-                type
-                  (ty (caller-args.context arg.0))
-                otype
-                  (ty (caller-oargs.context arg.0))
+
+                ; control flow
                 jmp
                   (do (= pc.context (+ 1 pc.context (v arg.0)))
 ;?                       (prn "jumping to " pc.context)
@@ -323,6 +288,8 @@
                     (= pc.context (+ 1 pc.context (v arg.1)))
 ;?                     (prn "jumping to " pc.context)
                     (continue))
+
+                ; data management: scalars, arrays, records
                 copy
                   (m arg.0)
                 get
@@ -373,7 +340,6 @@
                     (if typeinfo.base!array
                       (array-ref-addr base idx)
                       (assert nil "get-address on invalid type @arg.0 => @base")))
-
                 new
                   (let type (v arg.0)
                     (if types*.type!array
@@ -413,6 +379,19 @@
                 console-off
                   (do1 nil (if ($.current-charterm) ($.close-charterm)))
 
+                ; user-defined functions
+                arg
+                  (let idx (if arg
+                             arg.0
+                             (do1 caller-arg-idx.context
+                                (++ caller-arg-idx.context)))
+;?                     (prn idx)
+;?                     (prn caller-args.context)
+                    (m caller-args.context.idx))
+                type
+                  (ty (caller-args.context arg.0))
+                otype
+                  (ty (caller-oargs.context arg.0))
                 reply
                   (do (pop-stack context)
                       (if empty.context (return ninstrs))
@@ -425,13 +404,14 @@
                         (if empty.context (return ninstrs))
                         (++ pc.context))
                       (continue))
-                ; else user-defined function
+                ; else try to call as a user-defined function
                   (do (if function*.op
                         (push-stack context op)
                         (err "no such op @op"))
                       (continue))
                 )
               ; opcode generated some value, stored in 'tmp'
+              ; copy to output args
 ;?               (prn "store: " tmp " " oarg)
               (if (acons tmp)
                 (for i 0 (< i (min len.tmp len.oarg)) ++.i
@@ -463,6 +443,8 @@
             (each elem types*.type!elems
               (yield sizeof.elem))))))
 
+; desugar structured assembly based on blocks
+
 (def convert-braces (instrs)
   (let locs ()  ; list of information on each brace: (open/close pc)
     (let pc 0
@@ -552,6 +534,7 @@
   (pr msg)
   (apply prn args))
 
+; after loading all files, start at 'main'
 (reset)
 (awhen cdr.argv
   (map add-fns:readfile it)
diff --git a/mu.arc.t b/mu.arc.t
index 7f9e1042..7f7f8a14 100644
--- a/mu.arc.t
+++ b/mu.arc.t
@@ -1,5 +1,168 @@
+; Mu: An exploration on making the global structure of programs more accessible.
+;
+;   "Is it a language, or an operating system, or a virtual machine? Mu."
+;   (with apologies to Robert Pirsig: http://en.wikipedia.org/wiki/Mu_%28negative%29#In_popular_culture)
+;
+; I want to live in a world where I can have an itch to tweak a program, clone
+; its open-source repository, orient myself on how it's organized, and make
+; the simple change I envisioned, all in an afternoon. This codebase tries to
+; make this possible for its readers. (More details: http://akkartik.name/about)
+;
+; What helps comprehend the global structure of programs? For starters, let's
+; enumerate what doesn't: idiomatic code, adherence to a style guide or naming
+; convention, consistent indentation, API documentation for each class, etc.
+; These conventional considerations improve matters in the small, but don't
+; help understand global organization. They help existing programmers manage
+; day-to-day operations, but they can't turn outsider programmers into
+; insiders. (Elaboration: http://akkartik.name/post/readable-bad)
+;
+; In my experience, two things have improved matters so far: version control
+; and automated tests. Version control lets me rewind back to earlier, simpler
+; times when the codebase was simpler, when its core skeleton was easier to
+; ascertain. Indeed, arguably what came first is by definition the skeleton of
+; a program, modulo major rewrites. Once you understand the skeleton, it
+; becomes tractable to 'play back' later major features one by one. (Previous
+; project that fleshed out this idea: http://akkartik.name/post/wart-layers)
+;
+; The second and biggest boost to comprehension comes from tests. Tests are
+; good for writers for well-understood reasons: they avoid regressions, and
+; they can influence code to be more decoupled and easier to change. In
+; addition, tests are also good for the outsider reader because they permit
+; active reading. If you can't build a program and run its tests it can't help
+; you understand it. It hangs limp at best, and might even be actively
+; misleading. If you can run its tests, however, it comes alive. You can step
+; through scenarios in a debugger. You can add logging and scan logs to make
+; sense of them. You can run what-if scenarios: "why is this line not written
+; like this?" Make a change, rerun tests: "Oh, that's why." (Elaboration:
+; http://akkartik.name/post/literate-programming)
+;
+; However, tests are only useful to the extent that they exist. Think back to
+; your most recent codebase. Do you feel comfortable releasing a new version
+; just because the tests pass? I'm not aware of any such project. There's just
+; too many situations envisaged by the authors that were never encoded in a
+; test. Even disciplined authors can't test for performance or race conditions
+; or fault tolerance. If a line is phrased just so because of some subtle
+; performance consideration, it's hard to communicate to newcomers.
+;
+; This isn't an arcane problem, and it isn't just a matter of altruism. As
+; more and more such implicit considerations proliferate, and as the original
+; authors are replaced by latecomers for day-to-day operations, knowledge is
+; actively forgotten and lost. The once-pristine codebase turns into legacy
+; code that is hard to modify without expensive and stress-inducing
+; regressions.
+;
+; How to write tests for performance, fault tolerance, race conditions, etc.?
+; How can we state and verify that a codepath doesn't ever perform memory
+; allocation, or write to disk? It requires better, more observable primitives
+; than we currently have. Modern operating systems have their roots in the
+; 70s. Their interfaces were not designed to be testable. They provide no way
+; to simulate a full disk, or a specific sequence of writes from different
+; threads. We need something better.
+;
+; This project tries to move, groping, towards that 'something better', a
+; platform that is both thoroughly tested and allows programs written for it
+; to be thoroughly tested. It tries to answer the question:
+;
+;   If Denis Ritchie and Ken Thompson were to set out today to co-design unix
+;   and C, knowing what we know about automated tests, what would they do
+;   differently?
+;
+; To try to impose *some* constraints on this gigantic yak-shave, we'll try to
+; keep both language and OS as simple as possible, focused entirely on
+; permitting more kinds of tests, on first *collecting* all the information
+; about implicit considerations in some form so that readers and tools can
+; have at least some hope of making sense of it.
+;
+; The initial language will be just assembly. We'll try to make it convenient
+; to program in with some simple localized rewrite rules inspired by lisp
+; macros and literate programming. Programmers will have to do their own
+; memory management and register allocation, but we'll provide libraries to
+; help with them.
+;
+; The initial OS will provide just memory management and concurrency
+; primitives. No users or permissions (we don't live on mainframes anymore),
+; no kernel- vs user-mode, no virtual memory or process abstraction, all
+; threads sharing a single address space (use VMs for security and
+; sandboxing). The only use case we care about is getting a test harness to
+; run some code, feed it data through blocking channels, stop it and observe
+; its internals. The code under test is expected to cooperate in such testing,
+; by logging important events for the test harness to observe. (More info:
+; http://akkartik.name/post/tracing-tests)
+;
+; The common thread here is elimination of abstractions, and it's not an
+; accident Abstractions help insiders manage the evolution of a codebase, but
+; they actively hinder outsiders in understanding it from scratch. This
+; matters, because the funnel to turn outsiders into insiders is critical to
+; the long-term life of a codebase. Perhaps authors should raise their
+; estimation of the costs of abstraction, and go against their instincts for
+; introducing it. That's what I'll be trying to do: question every abstraction
+; before I introduce it. We'll see how it goes.
+
+; ---
+
+; Mu is currently built atop Racket and Arc, but this is temporary and
+; contingent. We want to keep our options open, whether to port to a different
+; host language, and easy to rewrite to native code for any platform. So we'll
+; try to avoid cheating by using host functionality 'for free'.
+;
+; Other than that, we'll say no more about the code, and focus in the rest of
+; this file on the scenarios the code cares about.
+
 (load "mu.arc")
 
+; Every test below is conceptually a run right after our virtual machine
+; starts up. When it starts up we assume it knows about the following types.
+
+(on-init
+  (= types* (obj
+              ; Each type must be scalar or array, sum or product or primitive
+              type (obj size 1)  ; implicitly scalar and primitive
+              type-array (obj array t  elem 'type)
+              type-array-address (obj size 1  address t  elem 'type-array)
+              location (obj size 1)
+              integer (obj size 1)
+              boolean (obj size 1)
+              boolean-address (obj size 1  address t)
+              byte (obj size 1)
+;?               string (obj array t  elem 'byte)  ; inspired by Go
+              character (obj size 1)  ; int32 like a Go rune
+              character-address (obj size 1  address t  elem 'character)
+              string (obj size 1)  ; temporary hack
+              ; arrays consist of an integer length followed by the right number of elems
+              integer-array (obj array t  elem 'integer)
+              integer-address (obj size 1  address t  elem 'integer)  ; pointer to int
+              ; records consist of a series of elems, corresponding to a list of types
+              integer-boolean-pair (obj size 2  record t  elems '(integer boolean))
+              integer-boolean-pair-address (obj size 1  address t  elem 'integer-boolean-pair)
+              integer-boolean-pair-array (obj array t  elem 'integer-boolean-pair)
+              integer-integer-pair (obj size 2  record t  elems '(integer integer))
+              integer-point-pair (obj size 2  record t  elems '(integer integer-integer-pair))
+              )))
+
+; Our language is assembly-like in that functions consist of series of
+; statements, and statements consist of an operation and its arguments (input
+; and output).
+;
+;   oarg1, oarg2, ... <- op arg1, arg2, ...
+;
+; Args must be atomic, like an integer or a memory address, they can't be
+; expressions doing arithmetic or function calls. But we can have any number
+; of them.
+;
+; Since we're building on lisp, our code samples won't look quite like the
+; idealized syntax above. For now they will be lists of lists:
+;
+;   (function-name
+;     ((oarg1 oarg2 ... <- op arg1 arg2 ...)
+;      ...
+;      ...))
+;
+; Each arg/oarg is itself a list, with the payload value at the head, and
+; various metadata in the rest. In this first example the only metadata is types:
+; 'integer' for a memory location containing an integer, and 'literal' for a
+; value included directly in code. (Assembly languages traditionally call them
+; 'immediate' operands.)
+
 (reset)
 (new-trace "literal")
 (add-fns
@@ -11,6 +174,9 @@
   (prn "F - 'copy' writes its lone 'arg' after the instruction name to its lone 'oarg' or output arg before the arrow. After this test, the value 23 is stored in memory address 1."))
 ;? (quit)
 
+; Our basic arithmetic ops can operate on memory locations or literals.
+; (Ignore hardware details like registers for now.)
+
 (reset)
 (new-trace "add")
 (add-fns
@@ -22,167 +188,6 @@
 (if (~iso memory* (obj 1 1  2 3  3 4))
   (prn "F - 'add' operates on two addresses"))
 
-(reset)
-(new-trace "new-fn")
-(add-fns
-  '((test1
-      ((3 integer) <- add (1 integer) (2 integer)))
-    (main
-      ((1 integer) <- copy (1 literal))
-      ((2 integer) <- copy (3 literal))
-      (test1))))
-(run 'main)
-;? (prn memory*)
-(if (~iso memory* (obj 1 1  2 3  3 4))
-  (prn "F - calling a user-defined function runs its instructions"))
-;? (quit)
-
-(reset)
-(new-trace "new-fn-once")
-(add-fns
-  '((test1
-      ((1 integer) <- copy (1 literal)))
-    (main
-      (test1))))
-(if (~is 2 (run 'main))
-  (prn "F - calling a user-defined function runs its instructions exactly once"))
-;? (quit)
-
-(reset)
-(new-trace "new-fn-reply")
-(add-fns
-  '((test1
-      ((3 integer) <- add (1 integer) (2 integer))
-      (reply)
-      ((4 integer) <- copy (34 literal)))
-    (main
-      ((1 integer) <- copy (1 literal))
-      ((2 integer) <- copy (3 literal))
-      (test1))))
-(run 'main)
-;? (prn memory*)
-(if (~iso memory* (obj 1 1  2 3  3 4))
-  (prn "F - 'reply' stops executing the current function"))
-;? (quit)
-
-(reset)
-(new-trace "new-fn-reply-nested")
-(add-fns
-  `((test1
-      ((3 integer) <- test2))
-    (test2
-      (reply (2 integer)))
-    (main
-      ((2 integer) <- copy (34 literal))
-      (test1))))
-(run 'main)
-;? (prn memory*)
-(if (~iso memory* (obj 2 34  3 34))
-  (prn "F - 'reply' stops executing any callers as necessary"))
-;? (quit)
-
-(reset)
-(new-trace "new-fn-reply-once")
-(add-fns
-  '((test1
-      ((3 integer) <- add (1 integer) (2 integer))
-      (reply)
-      ((4 integer) <- copy (34 literal)))
-    (main
-      ((1 integer) <- copy (1 literal))
-      ((2 integer) <- copy (3 literal))
-      (test1))))
-(if (~is 4 (run 'main))  ; last reply sometimes not counted. worth fixing?
-  (prn "F - 'reply' executes instructions exactly once"))
-;? (quit)
-
-(reset)
-(new-trace "new-fn-arg-sequential")
-(add-fns
-  '((test1
-      ((4 integer) <- arg)
-      ((5 integer) <- arg)
-      ((3 integer) <- add (4 integer) (5 integer))
-      (reply)
-      ((4 integer) <- copy (34 literal)))
-    (main
-      ((1 integer) <- copy (1 literal))
-      ((2 integer) <- copy (3 literal))
-      (test1 (1 integer) (2 integer))
-    )))
-(run 'main)
-;? (prn memory*)
-(if (~iso memory* (obj 1 1  2 3  3 4
-                       ; add-fn's temporaries
-                       4 1  5 3))
-  (prn "F - 'arg' accesses in order the operands of the most recent function call (the caller)"))
-;? (quit)
-
-(reset)
-(new-trace "new-fn-arg-random-access")
-(add-fns
-  '((test1
-      ((5 integer) <- arg 1)
-      ((4 integer) <- arg 0)
-      ((3 integer) <- add (4 integer) (5 integer))
-      (reply)
-      ((4 integer) <- copy (34 literal)))  ; should never run
-    (main
-      ((1 integer) <- copy (1 literal))
-      ((2 integer) <- copy (3 literal))
-      (test1 (1 integer) (2 integer))
-    )))
-(run 'main)
-;? (prn memory*)
-(if (~iso memory* (obj 1 1  2 3  3 4
-                       ; add-fn's temporaries
-                       4 1  5 3))
-  (prn "F - 'arg' with index can access function call arguments out of order"))
-;? (quit)
-
-; todo: test that too few args throws an error
-; how should errors be handled? will be unclear until we support concurrency and routine trees.
-
-(reset)
-(new-trace "new-fn-reply-oarg")
-(add-fns
-  '((test1
-      ((4 integer) <- arg)
-      ((5 integer) <- arg)
-      ((6 integer) <- add (4 integer) (5 integer))
-      (reply (6 integer))
-      ((4 integer) <- copy (34 literal)))
-    (main
-      ((1 integer) <- copy (1 literal))
-      ((2 integer) <- copy (3 literal))
-      ((3 integer) <- test1 (1 integer) (2 integer)))))
-(run 'main)
-;? (prn memory*)
-(if (~iso memory* (obj 1 1  2 3  3 4
-                       ; add-fn's temporaries
-                       4 1  5 3  6 4))
-  (prn "F - 'reply' can take aguments that are returned, or written back into output args of caller"))
-
-(reset)
-(new-trace "new-fn-reply-oarg-multiple")
-(add-fns
-  '((test1
-      ((4 integer) <- arg)
-      ((5 integer) <- arg)
-      ((6 integer) <- add (4 integer) (5 integer))
-      (reply (6 integer) (5 integer))
-      ((4 integer) <- copy (34 literal)))
-    (main
-      ((1 integer) <- copy (1 literal))
-      ((2 integer) <- copy (3 literal))
-      ((3 integer) (7 integer) <- test1 (1 integer) (2 integer)))))
-(run 'main)
-;? (prn memory*)
-(if (~iso memory* (obj 1 1  2 3  3 4    7 3
-                         ; add-fn's temporaries
-                         4 1  5 3  6 4))
-  (prn "F - 'reply' permits a function to return multiple values at once"))
-
 (reset)
 (new-trace "add-literal")
 (add-fns
@@ -200,7 +205,7 @@
 (run 'main)
 ;? (prn memory*)
 (if (~is memory*.1 -2)
-  (prn "F - 'sub' subtracts the value at one address from the value at another"))
+  (prn "F - 'sub' subtracts the second arg from the first"))
 
 (reset)
 (new-trace "mul-literal")
@@ -220,7 +225,7 @@
 (run 'main)
 ;? (prn memory*)
 (if (~is memory*.1 (/ real.8 3))
-  (prn "F - 'div' divides like 'add' adds"))
+  (prn "F - 'div' divides like 'sub' subtracts"))
 
 (reset)
 (new-trace "idiv-literal")
@@ -232,6 +237,10 @@
 (if (~iso memory* (obj 1 2  2 2))
   (prn "F - 'idiv' performs integer division, returning quotient and remainder"))
 
+; Basic boolean operations: and, or, not
+; There are easy ways to encode booleans in binary, but we'll skip past those
+; details.
+
 (reset)
 (new-trace "and-literal")
 (add-fns
@@ -242,6 +251,8 @@
 (if (~is memory*.1 nil)
   (prn "F - logical 'and' for booleans"))
 
+; Basic comparison operations: lt, le, gt, ge, eq, neq
+
 (reset)
 (new-trace "lt-literal")
 (add-fns
@@ -282,6 +293,10 @@
 (if (~is memory*.1 t)
   (prn "F - le is the <= inequality operator - 2"))
 
+; Control flow operations: jmp, jif
+; These introduce a new type -- 'offset' -- for literals that refer to memory
+; locations relative to the current location.
+
 (reset)
 (new-trace "jmp-skip")
 (add-fns
@@ -357,6 +372,10 @@
 (if (~iso memory* (obj 1 2  2 4  3 nil  4 3))
   (prn "F - 'jif' can take a negative offset to make backward jumps"))
 
+; Data movement relies on addressing modes:
+;   'direct' - refers to a memory location; default for most types.
+;   'literal' - directly encoded in the code; implicit for some types like 'offset'.
+
 (reset)
 (new-trace "direct-addressing")
 (add-fns
@@ -368,6 +387,11 @@
 (if (~iso memory* (obj 1 34  2 34))
   (prn "F - 'copy' performs direct addressing"))
 
+; 'Indirect' addressing refers to an address stored in a memory location.
+; Indicated by the metadata 'deref'. Usually requires an address type.
+; In the test below, the memory location 1 contains '2', so an indirect read
+; of location 1 returns the value of location 2.
+
 (reset)
 (new-trace "indirect-addressing")
 (add-fns
@@ -380,6 +404,9 @@
 (if (~iso memory* (obj 1 2  2 34  3 34))
   (prn "F - 'copy' performs indirect addressing"))
 
+; Output args can use indirect addressing. In the test below the value is
+; stored at the location stored in location 1 (i.e. location 2).
+
 (reset)
 (new-trace "indirect-addressing-oarg")
 (add-fns
@@ -392,6 +419,12 @@
 (if (~iso memory* (obj 1 2  2 36))
   (prn "F - instructions can perform indirect addressing on output arg"))
 
+; Until now we've dealt with scalar types like integers and booleans and
+; addresses. We can also have compound types: arrays and records.
+;
+; 'get' accesses fields in records
+; 'index' accesses indices in arrays
+
 (reset)
 (new-trace "get-record")
 (add-fns
@@ -504,6 +537,10 @@
 (if (~iso memory* (obj 1 2  2 23 3 nil  4 24 5 t  6 1  7 4))
   (prn "F - 'index-address' returns addresses of indices of arrays"))
 
+; todo: test that out-of-bounds access throws an error
+
+; Array values know their length. Record lengths are saved in the types table.
+
 (reset)
 (new-trace "len-array")
 (add-fns
@@ -519,6 +556,8 @@
 (if (~iso memory* (obj 1 2  2 23 3 nil  4 24 5 t  6 2))
   (prn "F - 'len' accesses length of array"))
 
+; 'sizeof' is a helper to determine the amount of memory required by a type.
+
 (reset)
 (new-trace "sizeof-record")
 (add-fns
@@ -539,7 +578,7 @@
 (if (~is memory*.1 3)
   (prn "F - 'sizeof' is different from number of elems"))
 
-; todo: test that out-of-bounds access throws an error
+; Regardless of a type's length, you can move it around with 'copy'.
 
 (reset)
 (new-trace "compound-operand")
@@ -554,6 +593,186 @@
 (if (~iso memory* (obj 1 34  2 nil  3 34  4 nil))
   (prn "F - ops can operate on records spanning multiple locations"))
 
+; Just like the table of types is centralized, functions are conceptualized as
+; a centralized table of operations just like the 'primitives' we've seen so
+; far. If you create a function you can call it like any other op.
+
+(reset)
+(new-trace "new-fn")
+(add-fns
+  '((test1
+      ((3 integer) <- add (1 integer) (2 integer)))
+    (main
+      ((1 integer) <- copy (1 literal))
+      ((2 integer) <- copy (3 literal))
+      (test1))))
+(run 'main)
+;? (prn memory*)
+(if (~iso memory* (obj 1 1  2 3  3 4))
+  (prn "F - calling a user-defined function runs its instructions"))
+;? (quit)
+
+(reset)
+(new-trace "new-fn-once")
+(add-fns
+  '((test1
+      ((1 integer) <- copy (1 literal)))
+    (main
+      (test1))))
+(if (~is 2 (run 'main))
+  (prn "F - calling a user-defined function runs its instructions exactly once"))
+;? (quit)
+
+; User-defined functions communicate with their callers through two
+; primitives:
+;
+;   'arg' - to access inputs
+;   'reply' - to return outputs
+
+(reset)
+(new-trace "new-fn-reply")
+(add-fns
+  '((test1
+      ((3 integer) <- add (1 integer) (2 integer))
+      (reply)
+      ((4 integer) <- copy (34 literal)))
+    (main
+      ((1 integer) <- copy (1 literal))
+      ((2 integer) <- copy (3 literal))
+      (test1))))
+(run 'main)
+;? (prn memory*)
+(if (~iso memory* (obj 1 1  2 3  3 4))
+  (prn "F - 'reply' stops executing the current function"))
+;? (quit)
+
+(reset)
+(new-trace "new-fn-reply-nested")
+(add-fns
+  `((test1
+      ((3 integer) <- test2))
+    (test2
+      (reply (2 integer)))
+    (main
+      ((2 integer) <- copy (34 literal))
+      (test1))))
+(run 'main)
+;? (prn memory*)
+(if (~iso memory* (obj 2 34  3 34))
+  (prn "F - 'reply' stops executing any callers as necessary"))
+;? (quit)
+
+(reset)
+(new-trace "new-fn-reply-once")
+(add-fns
+  '((test1
+      ((3 integer) <- add (1 integer) (2 integer))
+      (reply)
+      ((4 integer) <- copy (34 literal)))
+    (main
+      ((1 integer) <- copy (1 literal))
+      ((2 integer) <- copy (3 literal))
+      (test1))))
+(if (~is 4 (run 'main))  ; last reply sometimes not counted. worth fixing?
+  (prn "F - 'reply' executes instructions exactly once"))
+;? (quit)
+
+(reset)
+(new-trace "new-fn-arg-sequential")
+(add-fns
+  '((test1
+      ((4 integer) <- arg)
+      ((5 integer) <- arg)
+      ((3 integer) <- add (4 integer) (5 integer))
+      (reply)
+      ((4 integer) <- copy (34 literal)))
+    (main
+      ((1 integer) <- copy (1 literal))
+      ((2 integer) <- copy (3 literal))
+      (test1 (1 integer) (2 integer))
+    )))
+(run 'main)
+;? (prn memory*)
+(if (~iso memory* (obj 1 1  2 3  3 4
+                       ; add-fn's temporaries
+                       4 1  5 3))
+  (prn "F - 'arg' accesses in order the operands of the most recent function call (the caller)"))
+;? (quit)
+
+(reset)
+(new-trace "new-fn-arg-random-access")
+(add-fns
+  '((test1
+      ((5 integer) <- arg 1)
+      ((4 integer) <- arg 0)
+      ((3 integer) <- add (4 integer) (5 integer))
+      (reply)
+      ((4 integer) <- copy (34 literal)))  ; should never run
+    (main
+      ((1 integer) <- copy (1 literal))
+      ((2 integer) <- copy (3 literal))
+      (test1 (1 integer) (2 integer))
+    )))
+(run 'main)
+;? (prn memory*)
+(if (~iso memory* (obj 1 1  2 3  3 4
+                       ; add-fn's temporaries
+                       4 1  5 3))
+  (prn "F - 'arg' with index can access function call arguments out of order"))
+;? (quit)
+
+; todo: test that too few args throws an error
+; how should errors be handled? will be unclear until we support concurrency and routine trees.
+
+(reset)
+(new-trace "new-fn-reply-oarg")
+(add-fns
+  '((test1
+      ((4 integer) <- arg)
+      ((5 integer) <- arg)
+      ((6 integer) <- add (4 integer) (5 integer))
+      (reply (6 integer))
+      ((4 integer) <- copy (34 literal)))
+    (main
+      ((1 integer) <- copy (1 literal))
+      ((2 integer) <- copy (3 literal))
+      ((3 integer) <- test1 (1 integer) (2 integer)))))
+(run 'main)
+;? (prn memory*)
+(if (~iso memory* (obj 1 1  2 3  3 4
+                       ; add-fn's temporaries
+                       4 1  5 3  6 4))
+  (prn "F - 'reply' can take aguments that are returned, or written back into output args of caller"))
+
+(reset)
+(new-trace "new-fn-reply-oarg-multiple")
+(add-fns
+  '((test1
+      ((4 integer) <- arg)
+      ((5 integer) <- arg)
+      ((6 integer) <- add (4 integer) (5 integer))
+      (reply (6 integer) (5 integer))
+      ((4 integer) <- copy (34 literal)))
+    (main
+      ((1 integer) <- copy (1 literal))
+      ((2 integer) <- copy (3 literal))
+      ((3 integer) (7 integer) <- test1 (1 integer) (2 integer)))))
+(run 'main)
+;? (prn memory*)
+(if (~iso memory* (obj 1 1  2 3  3 4    7 3
+                         ; add-fn's temporaries
+                         4 1  5 3  6 4))
+  (prn "F - 'reply' permits a function to return multiple values at once"))
+
+; 'type' and 'otype' let us create generic functions that run different code
+; based on what args the caller provides, or what oargs the caller expects.
+;
+; These operations are more experimental than their surroundings; we might
+; eventually need more detailed access to the calling instruction.
+;
+; There's also the open question of how to deal with dynamic-typing situations
+; where the caller doesn't know the type of its arg/oarg.
+
 (reset)
 (new-trace "dispatch-otype")
 (add-fns
@@ -638,6 +857,25 @@
                        4 'integer  5 nil  6 3  7 4  8 7))
   (prn "F - different calls can exercise different clauses of the same function (internals)"))
 
+; Our control operators are quite inconvenient to use, so mu provides a
+; lightweight tool called 'convert-braces' to work in a slightly more
+; convenient format with nested braces:
+;
+;   {
+;     some instructions
+;     {
+;       more instructions
+;     }
+;   }
+;
+; Braces are just labels, they require no special parsing. The operations
+; 'break' and 'continue' jump to just after the enclosing '}' and '{'
+; respectively.
+;
+; Conditional and unconditional 'break' and 'continue' should give us 80% of
+; the benefits of the control-flow primitives we're used to in other
+; languages, like 'if', 'while', 'for', etc.
+
 (reset)
 (new-trace "convert-braces")
 (if (~iso (convert-braces '(((1 integer) <- copy (4 literal))
@@ -779,6 +1017,8 @@
 (if (~iso memory* (obj 1 4  2 4  3 nil  4 34))
   (prn "F - continue might never trigger"))
 
+; A rudimentary memory allocator. Eventually we want to write this in mu.
+
 (reset)
 (new-trace "new-primitive")
 (let before Memory-in-use-until
@@ -819,6 +1059,13 @@
   (if (~iso Memory-in-use-until (+ before 6))
     (prn "F - 'new' on primitive arrays increments high-water mark by their (variable) size")))
 
+; A rudimentary process scheduler. You can 'run' multiple functions at once,
+; and they share the virtual processor.
+; There's also a 'fork' primitive to let functions create new threads of
+; execution.
+; Eventually we want to allow callers to influence how much of their CPU they
+; give to their 'children', or to rescind a child's running privileges.
+
 (reset)
 (new-trace "scheduler")
 (add-fns
@@ -842,4 +1089,11 @@
     ("run" "f2 0")
   ))
 
-(reset)
+; The scheduler needs to keep track of the call stack for each thread.
+; Eventually we'll want to save this information in mu's address space itself,
+; along with the types array, the magic buffers for args and oargs, and so on.
+;
+; Eventually we want the right stack-management primitives to build delimited
+; continuations in mu.
+
+(reset)  ; end file with this to persist the trace for the final test
diff --git a/sys.arc b/sys.arc
deleted file mode 100644
index b5f9c436..00000000
--- a/sys.arc
+++ /dev/null
@@ -1,58 +0,0 @@
-(load "mu.arc")
-
-; memory map: 0-2 for convenience constants
-(enq (fn ()
-       (run `(((0 integer) <- literal 0)
-              ((1 integer) <- literal 1)
-              ((2 integer) <- literal 2))))
-     initialization-fns*)
-
-(enq (fn ()
-       (build-type-table)
-     initialization-fns*)
-
-(= Free 3)
-(= Type-array Free)
-(def build-type-table ()
-  (allocate-type-array)
-  (build-types)
-  (fill-in-type-array))
-
-(def allocate-type-array ()
-  (= memory*.Free len.types*)
-  (++ Free)
-  (++ Free len.types*))
-
-(def build-types ()
-  (each type types*  ; todo
-    (
-
-(def sizeof (typeinfo)
-  (if (~or typeinfo!record typeinfo!array)
-        typeinfo!size
-      typeinfo!record
-        (sum idfn
-          (accum yield
-            (each elem typeinfo!elems
-              (yield (sizeof type*.elem)))))
-      typeinfo!array
-        (* (sizeof (type* typeinfo!elem))
-           (
-
-
-;; 'new' - simple slab allocator. Intended only to carve out isolated memory
-;; for different threads/routines as they request.
-; memory map: 3 for root custodian (size 1)
-(= Root_custodian 3)
-(= Allocator_start 1000)  ; lower locations reserved
-
-(enq (fn ()
-       (run `(((,Root_custodian location) <- literal ,Allocator_start))))
-     initialization-fns*)
-
-; (type-addr val) <- new (custodian x), (type t)
-; memory map: 4-5 locals for slab allocator
-(init-fn new
-  ((4 integer-address) <- copy (3 location))
-  ((3 location) <- add (3 location) (1 integer))
-  (reply (4 integer-address)))
diff --git a/sys.arc.t b/sys.arc.t
deleted file mode 100644
index 3a5d0acf..00000000
--- a/sys.arc.t
+++ /dev/null
@@ -1,23 +0,0 @@
-(load "new.arc")
-
-(reset)
-;? (prn memory*)
-(if (~iso memory*.Root_custodian Allocator_start)
-  (prn "F - allocator initialized"))
-
-(reset)
-(add-fns
-  '((main
-      ((x integer-address) <- new)
-      ((x integer-address deref) <- literal 34))))
-(run function*!main)
-;? (prn memory*)
-(if (~iso memory*.Root_custodian (+ Allocator_start 1))
-  (prn "F - 'new' increments allocator pointer"))
-(if (~iso memory*.Allocator_start 34)
-  (prn "F - 'new' returns old location"))
-
-; other tests to express:
-;  no other function can increment the pointer
-;  no later clause can increment the pointer after this base clause
-;  multiple threads/routines can't call the allocator at once
-- 
cgit 1.4.1-2-gfad0