From 630e93635c1fe2f0c2bde7823ddeb49fb48f6872 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Fri, 10 Oct 2014 15:04:14 -0700 Subject: 123 - experiment: build the reading flow around the *test* file --- edit.arc.t | 25 +++ mu.arc | 101 +++++------ mu.arc.t | 584 ++++++++++++++++++++++++++++++++++++++++++++----------------- sys.arc | 58 ------ sys.arc.t | 23 --- 5 files changed, 486 insertions(+), 305 deletions(-) delete mode 100644 sys.arc delete mode 100644 sys.arc.t diff --git a/edit.arc.t b/edit.arc.t index 6272699b..3986cbfc 100644 --- a/edit.arc.t +++ b/edit.arc.t @@ -1,5 +1,30 @@ (load "mu.arc") +(on-init + (= types* (obj + ; Each type must be scalar or array, sum or product or primitive + location (obj size 1) + integer (obj size 1) + boolean (obj size 1) + boolean-address (obj size 1 address t) + byte (obj size 1) +;? string (obj array t elem 'byte) ; inspired by Go + character (obj size 1) ; int32 like a Go rune + character-address (obj size 1 address t elem 'character) + string (obj size 1) ; temporary hack + ; arrays consist of an integer length followed by the right number of elems + integer-array (obj array t elem 'integer) + integer-address (obj size 1 address t elem 'integer) ; pointer to int + ; records consist of a series of elems, corresponding to a list of types + integer-boolean-pair (obj size 2 record t elems '(integer boolean)) + ; editor + line (obj array t elem 'character) + line-address (obj size 1 address t elem 'line) + line-address-address (obj size 1 address t elem 'line-address) + screen (obj array t elem 'line-address) + screen-address (obj size 1 address t elem 'screen) + ))) + (reset) (new-trace "new-screen") ;? (set dump-trace*) diff --git a/mu.arc b/mu.arc index c2eebc47..9859be1c 100644 --- a/mu.arc +++ b/mu.arc @@ -1,5 +1,4 @@ -; things that a future assembler will need separate memory for: -; code; types; args channel +; what happens when our virtual machine starts up (= initialization-fns* (queue)) (def reset () (each f (as cons initialization-fns*) @@ -9,6 +8,19 @@ `(enq (fn () ,@body) initialization-fns*)) +(mac init-fn (name . body) + `(enq (fn () (= (function* ',name) ',body)) + initialization-fns*)) + +; things that a future assembler will need separate memory for: +; code; types; args channel +(def clear () + (= types* (table)) + (= memory* (table)) + (= function* (table))) +(enq clear initialization-fns*) + +; persisting and checking traces for each test (= traces* (queue)) (= trace-dir* ".traces/") (ensure-dir trace-dir*) @@ -60,53 +72,11 @@ (each (expected-label expected-msg) expected-contents (prn " ! " expected-label ": " expected-msg))) -(mac init-fn (name . body) - `(enq (fn () (= (function* ',name) ',body)) - initialization-fns*)) - -(def clear () - (= types* (obj - ; must be scalar or array, sum or product or primitive - type (obj size 1) - type-array (obj array t elem 'type) - type-array-address (obj size 1 address t elem 'type-array) - typeinfo (obj size 5 record t elems '(integer boolean boolean boolean type-array)) - typeinfo-address (obj size 1 address t elem 'typeinfo) - typeinfo-address-array (obj array t elem 'typeinfo-address) - location (obj size 1) - integer (obj size 1) - boolean (obj size 1) - boolean-address (obj size 1 address t) - byte (obj size 1) -;? string (obj array t elem 'byte) ; inspired by Go - character (obj size 1) ; int32 like a Go rune - character-address (obj size 1 address t elem 'character) - string (obj size 1) ; temporary hack - ; arrays consist of an integer length followed by the right number of elems - integer-array (obj array t elem 'integer) - integer-address (obj size 1 address t elem 'integer) ; pointer to int - ; records consist of a series of elems, corresponding to a list of types - integer-boolean-pair (obj size 2 record t elems '(integer boolean)) - integer-boolean-pair-address (obj size 1 address t elem 'integer-boolean-pair) - integer-boolean-pair-array (obj array t elem 'integer-boolean-pair) - integer-integer-pair (obj size 2 record t elems '(integer integer)) - integer-point-pair (obj size 2 record t elems '(integer integer-integer-pair)) - custodian (obj size 1 record t elems '(integer)) - ; editor - line (obj array t elem 'character) - line-address (obj size 1 address t elem 'line) - line-address-address (obj size 1 address t elem 'line-address) - screen (obj array t elem 'line-address) - screen-address (obj size 1 address t elem 'screen) - )) - (= memory* (table)) - (= function* (table))) -(enq clear initialization-fns*) - (def add-fns (fns) (each (name . body) fns (= function*.name (convert-braces body)))) +; running mu (def v (operand) ; for value operand.0) @@ -269,6 +239,7 @@ ;? (prn op " " arg " -> " oarg) (let tmp (case op + ; arithmetic add (do (trace "add" (m arg.0) " " (m arg.1)) (+ (m arg.0) (m arg.1)) @@ -282,12 +253,16 @@ idiv (list (trunc:/ (m arg.0) (m arg.1)) (mod (m arg.0) (m arg.1))) + + ; boolean and (and (m arg.0) (m arg.1)) or (or (m arg.0) (m arg.1)) not (not (m arg.0)) + + ; comparison eq (is (m arg.0) (m arg.1)) neq @@ -302,18 +277,8 @@ (<= (m arg.0) (m arg.1)) ge (>= (m arg.0) (m arg.1)) - arg - (let idx (if arg - arg.0 - (do1 caller-arg-idx.context - (++ caller-arg-idx.context))) -;? (prn idx) -;? (prn caller-args.context) - (m caller-args.context.idx)) - type - (ty (caller-args.context arg.0)) - otype - (ty (caller-oargs.context arg.0)) + + ; control flow jmp (do (= pc.context (+ 1 pc.context (v arg.0))) ;? (prn "jumping to " pc.context) @@ -323,6 +288,8 @@ (= pc.context (+ 1 pc.context (v arg.1))) ;? (prn "jumping to " pc.context) (continue)) + + ; data management: scalars, arrays, records copy (m arg.0) get @@ -373,7 +340,6 @@ (if typeinfo.base!array (array-ref-addr base idx) (assert nil "get-address on invalid type @arg.0 => @base"))) - new (let type (v arg.0) (if types*.type!array @@ -413,6 +379,19 @@ console-off (do1 nil (if ($.current-charterm) ($.close-charterm))) + ; user-defined functions + arg + (let idx (if arg + arg.0 + (do1 caller-arg-idx.context + (++ caller-arg-idx.context))) +;? (prn idx) +;? (prn caller-args.context) + (m caller-args.context.idx)) + type + (ty (caller-args.context arg.0)) + otype + (ty (caller-oargs.context arg.0)) reply (do (pop-stack context) (if empty.context (return ninstrs)) @@ -425,13 +404,14 @@ (if empty.context (return ninstrs)) (++ pc.context)) (continue)) - ; else user-defined function + ; else try to call as a user-defined function (do (if function*.op (push-stack context op) (err "no such op @op")) (continue)) ) ; opcode generated some value, stored in 'tmp' + ; copy to output args ;? (prn "store: " tmp " " oarg) (if (acons tmp) (for i 0 (< i (min len.tmp len.oarg)) ++.i @@ -463,6 +443,8 @@ (each elem types*.type!elems (yield sizeof.elem)))))) +; desugar structured assembly based on blocks + (def convert-braces (instrs) (let locs () ; list of information on each brace: (open/close pc) (let pc 0 @@ -552,6 +534,7 @@ (pr msg) (apply prn args)) +; after loading all files, start at 'main' (reset) (awhen cdr.argv (map add-fns:readfile it) diff --git a/mu.arc.t b/mu.arc.t index 7f9e1042..7f7f8a14 100644 --- a/mu.arc.t +++ b/mu.arc.t @@ -1,5 +1,168 @@ +; Mu: An exploration on making the global structure of programs more accessible. +; +; "Is it a language, or an operating system, or a virtual machine? Mu." +; (with apologies to Robert Pirsig: http://en.wikipedia.org/wiki/Mu_%28negative%29#In_popular_culture) +; +; I want to live in a world where I can have an itch to tweak a program, clone +; its open-source repository, orient myself on how it's organized, and make +; the simple change I envisioned, all in an afternoon. This codebase tries to +; make this possible for its readers. (More details: http://akkartik.name/about) +; +; What helps comprehend the global structure of programs? For starters, let's +; enumerate what doesn't: idiomatic code, adherence to a style guide or naming +; convention, consistent indentation, API documentation for each class, etc. +; These conventional considerations improve matters in the small, but don't +; help understand global organization. They help existing programmers manage +; day-to-day operations, but they can't turn outsider programmers into +; insiders. (Elaboration: http://akkartik.name/post/readable-bad) +; +; In my experience, two things have improved matters so far: version control +; and automated tests. Version control lets me rewind back to earlier, simpler +; times when the codebase was simpler, when its core skeleton was easier to +; ascertain. Indeed, arguably what came first is by definition the skeleton of +; a program, modulo major rewrites. Once you understand the skeleton, it +; becomes tractable to 'play back' later major features one by one. (Previous +; project that fleshed out this idea: http://akkartik.name/post/wart-layers) +; +; The second and biggest boost to comprehension comes from tests. Tests are +; good for writers for well-understood reasons: they avoid regressions, and +; they can influence code to be more decoupled and easier to change. In +; addition, tests are also good for the outsider reader because they permit +; active reading. If you can't build a program and run its tests it can't help +; you understand it. It hangs limp at best, and might even be actively +; misleading. If you can run its tests, however, it comes alive. You can step +; through scenarios in a debugger. You can add logging and scan logs to make +; sense of them. You can run what-if scenarios: "why is this line not written +; like this?" Make a change, rerun tests: "Oh, that's why." (Elaboration: +; http://akkartik.name/post/literate-programming) +; +; However, tests are only useful to the extent that they exist. Think back to +; your most recent codebase. Do you feel comfortable releasing a new version +; just because the tests pass? I'm not aware of any such project. There's just +; too many situations envisaged by the authors that were never encoded in a +; test. Even disciplined authors can't test for performance or race conditions +; or fault tolerance. If a line is phrased just so because of some subtle +; performance consideration, it's hard to communicate to newcomers. +; +; This isn't an arcane problem, and it isn't just a matter of altruism. As +; more and more such implicit considerations proliferate, and as the original +; authors are replaced by latecomers for day-to-day operations, knowledge is +; actively forgotten and lost. The once-pristine codebase turns into legacy +; code that is hard to modify without expensive and stress-inducing +; regressions. +; +; How to write tests for performance, fault tolerance, race conditions, etc.? +; How can we state and verify that a codepath doesn't ever perform memory +; allocation, or write to disk? It requires better, more observable primitives +; than we currently have. Modern operating systems have their roots in the +; 70s. Their interfaces were not designed to be testable. They provide no way +; to simulate a full disk, or a specific sequence of writes from different +; threads. We need something better. +; +; This project tries to move, groping, towards that 'something better', a +; platform that is both thoroughly tested and allows programs written for it +; to be thoroughly tested. It tries to answer the question: +; +; If Denis Ritchie and Ken Thompson were to set out today to co-design unix +; and C, knowing what we know about automated tests, what would they do +; differently? +; +; To try to impose *some* constraints on this gigantic yak-shave, we'll try to +; keep both language and OS as simple as possible, focused entirely on +; permitting more kinds of tests, on first *collecting* all the information +; about implicit considerations in some form so that readers and tools can +; have at least some hope of making sense of it. +; +; The initial language will be just assembly. We'll try to make it convenient +; to program in with some simple localized rewrite rules inspired by lisp +; macros and literate programming. Programmers will have to do their own +; memory management and register allocation, but we'll provide libraries to +; help with them. +; +; The initial OS will provide just memory management and concurrency +; primitives. No users or permissions (we don't live on mainframes anymore), +; no kernel- vs user-mode, no virtual memory or process abstraction, all +; threads sharing a single address space (use VMs for security and +; sandboxing). The only use case we care about is getting a test harness to +; run some code, feed it data through blocking channels, stop it and observe +; its internals. The code under test is expected to cooperate in such testing, +; by logging important events for the test harness to observe. (More info: +; http://akkartik.name/post/tracing-tests) +; +; The common thread here is elimination of abstractions, and it's not an +; accident Abstractions help insiders manage the evolution of a codebase, but +; they actively hinder outsiders in understanding it from scratch. This +; matters, because the funnel to turn outsiders into insiders is critical to +; the long-term life of a codebase. Perhaps authors should raise their +; estimation of the costs of abstraction, and go against their instincts for +; introducing it. That's what I'll be trying to do: question every abstraction +; before I introduce it. We'll see how it goes. + +; --- + +; Mu is currently built atop Racket and Arc, but this is temporary and +; contingent. We want to keep our options open, whether to port to a different +; host language, and easy to rewrite to native code for any platform. So we'll +; try to avoid cheating by using host functionality 'for free'. +; +; Other than that, we'll say no more about the code, and focus in the rest of +; this file on the scenarios the code cares about. + (load "mu.arc") +; Every test below is conceptually a run right after our virtual machine +; starts up. When it starts up we assume it knows about the following types. + +(on-init + (= types* (obj + ; Each type must be scalar or array, sum or product or primitive + type (obj size 1) ; implicitly scalar and primitive + type-array (obj array t elem 'type) + type-array-address (obj size 1 address t elem 'type-array) + location (obj size 1) + integer (obj size 1) + boolean (obj size 1) + boolean-address (obj size 1 address t) + byte (obj size 1) +;? string (obj array t elem 'byte) ; inspired by Go + character (obj size 1) ; int32 like a Go rune + character-address (obj size 1 address t elem 'character) + string (obj size 1) ; temporary hack + ; arrays consist of an integer length followed by the right number of elems + integer-array (obj array t elem 'integer) + integer-address (obj size 1 address t elem 'integer) ; pointer to int + ; records consist of a series of elems, corresponding to a list of types + integer-boolean-pair (obj size 2 record t elems '(integer boolean)) + integer-boolean-pair-address (obj size 1 address t elem 'integer-boolean-pair) + integer-boolean-pair-array (obj array t elem 'integer-boolean-pair) + integer-integer-pair (obj size 2 record t elems '(integer integer)) + integer-point-pair (obj size 2 record t elems '(integer integer-integer-pair)) + ))) + +; Our language is assembly-like in that functions consist of series of +; statements, and statements consist of an operation and its arguments (input +; and output). +; +; oarg1, oarg2, ... <- op arg1, arg2, ... +; +; Args must be atomic, like an integer or a memory address, they can't be +; expressions doing arithmetic or function calls. But we can have any number +; of them. +; +; Since we're building on lisp, our code samples won't look quite like the +; idealized syntax above. For now they will be lists of lists: +; +; (function-name +; ((oarg1 oarg2 ... <- op arg1 arg2 ...) +; ... +; ...)) +; +; Each arg/oarg is itself a list, with the payload value at the head, and +; various metadata in the rest. In this first example the only metadata is types: +; 'integer' for a memory location containing an integer, and 'literal' for a +; value included directly in code. (Assembly languages traditionally call them +; 'immediate' operands.) + (reset) (new-trace "literal") (add-fns @@ -11,6 +174,9 @@ (prn "F - 'copy' writes its lone 'arg' after the instruction name to its lone 'oarg' or output arg before the arrow. After this test, the value 23 is stored in memory address 1.")) ;? (quit) +; Our basic arithmetic ops can operate on memory locations or literals. +; (Ignore hardware details like registers for now.) + (reset) (new-trace "add") (add-fns @@ -22,167 +188,6 @@ (if (~iso memory* (obj 1 1 2 3 3 4)) (prn "F - 'add' operates on two addresses")) -(reset) -(new-trace "new-fn") -(add-fns - '((test1 - ((3 integer) <- add (1 integer) (2 integer))) - (main - ((1 integer) <- copy (1 literal)) - ((2 integer) <- copy (3 literal)) - (test1)))) -(run 'main) -;? (prn memory*) -(if (~iso memory* (obj 1 1 2 3 3 4)) - (prn "F - calling a user-defined function runs its instructions")) -;? (quit) - -(reset) -(new-trace "new-fn-once") -(add-fns - '((test1 - ((1 integer) <- copy (1 literal))) - (main - (test1)))) -(if (~is 2 (run 'main)) - (prn "F - calling a user-defined function runs its instructions exactly once")) -;? (quit) - -(reset) -(new-trace "new-fn-reply") -(add-fns - '((test1 - ((3 integer) <- add (1 integer) (2 integer)) - (reply) - ((4 integer) <- copy (34 literal))) - (main - ((1 integer) <- copy (1 literal)) - ((2 integer) <- copy (3 literal)) - (test1)))) -(run 'main) -;? (prn memory*) -(if (~iso memory* (obj 1 1 2 3 3 4)) - (prn "F - 'reply' stops executing the current function")) -;? (quit) - -(reset) -(new-trace "new-fn-reply-nested") -(add-fns - `((test1 - ((3 integer) <- test2)) - (test2 - (reply (2 integer))) - (main - ((2 integer) <- copy (34 literal)) - (test1)))) -(run 'main) -;? (prn memory*) -(if (~iso memory* (obj 2 34 3 34)) - (prn "F - 'reply' stops executing any callers as necessary")) -;? (quit) - -(reset) -(new-trace "new-fn-reply-once") -(add-fns - '((test1 - ((3 integer) <- add (1 integer) (2 integer)) - (reply) - ((4 integer) <- copy (34 literal))) - (main - ((1 integer) <- copy (1 literal)) - ((2 integer) <- copy (3 literal)) - (test1)))) -(if (~is 4 (run 'main)) ; last reply sometimes not counted. worth fixing? - (prn "F - 'reply' executes instructions exactly once")) -;? (quit) - -(reset) -(new-trace "new-fn-arg-sequential") -(add-fns - '((test1 - ((4 integer) <- arg) - ((5 integer) <- arg) - ((3 integer) <- add (4 integer) (5 integer)) - (reply) - ((4 integer) <- copy (34 literal))) - (main - ((1 integer) <- copy (1 literal)) - ((2 integer) <- copy (3 literal)) - (test1 (1 integer) (2 integer)) - ))) -(run 'main) -;? (prn memory*) -(if (~iso memory* (obj 1 1 2 3 3 4 - ; add-fn's temporaries - 4 1 5 3)) - (prn "F - 'arg' accesses in order the operands of the most recent function call (the caller)")) -;? (quit) - -(reset) -(new-trace "new-fn-arg-random-access") -(add-fns - '((test1 - ((5 integer) <- arg 1) - ((4 integer) <- arg 0) - ((3 integer) <- add (4 integer) (5 integer)) - (reply) - ((4 integer) <- copy (34 literal))) ; should never run - (main - ((1 integer) <- copy (1 literal)) - ((2 integer) <- copy (3 literal)) - (test1 (1 integer) (2 integer)) - ))) -(run 'main) -;? (prn memory*) -(if (~iso memory* (obj 1 1 2 3 3 4 - ; add-fn's temporaries - 4 1 5 3)) - (prn "F - 'arg' with index can access function call arguments out of order")) -;? (quit) - -; todo: test that too few args throws an error -; how should errors be handled? will be unclear until we support concurrency and routine trees. - -(reset) -(new-trace "new-fn-reply-oarg") -(add-fns - '((test1 - ((4 integer) <- arg) - ((5 integer) <- arg) - ((6 integer) <- add (4 integer) (5 integer)) - (reply (6 integer)) - ((4 integer) <- copy (34 literal))) - (main - ((1 integer) <- copy (1 literal)) - ((2 integer) <- copy (3 literal)) - ((3 integer) <- test1 (1 integer) (2 integer))))) -(run 'main) -;? (prn memory*) -(if (~iso memory* (obj 1 1 2 3 3 4 - ; add-fn's temporaries - 4 1 5 3 6 4)) - (prn "F - 'reply' can take aguments that are returned, or written back into output args of caller")) - -(reset) -(new-trace "new-fn-reply-oarg-multiple") -(add-fns - '((test1 - ((4 integer) <- arg) - ((5 integer) <- arg) - ((6 integer) <- add (4 integer) (5 integer)) - (reply (6 integer) (5 integer)) - ((4 integer) <- copy (34 literal))) - (main - ((1 integer) <- copy (1 literal)) - ((2 integer) <- copy (3 literal)) - ((3 integer) (7 integer) <- test1 (1 integer) (2 integer))))) -(run 'main) -;? (prn memory*) -(if (~iso memory* (obj 1 1 2 3 3 4 7 3 - ; add-fn's temporaries - 4 1 5 3 6 4)) - (prn "F - 'reply' permits a function to return multiple values at once")) - (reset) (new-trace "add-literal") (add-fns @@ -200,7 +205,7 @@ (run 'main) ;? (prn memory*) (if (~is memory*.1 -2) - (prn "F - 'sub' subtracts the value at one address from the value at another")) + (prn "F - 'sub' subtracts the second arg from the first")) (reset) (new-trace "mul-literal") @@ -220,7 +225,7 @@ (run 'main) ;? (prn memory*) (if (~is memory*.1 (/ real.8 3)) - (prn "F - 'div' divides like 'add' adds")) + (prn "F - 'div' divides like 'sub' subtracts")) (reset) (new-trace "idiv-literal") @@ -232,6 +237,10 @@ (if (~iso memory* (obj 1 2 2 2)) (prn "F - 'idiv' performs integer division, returning quotient and remainder")) +; Basic boolean operations: and, or, not +; There are easy ways to encode booleans in binary, but we'll skip past those +; details. + (reset) (new-trace "and-literal") (add-fns @@ -242,6 +251,8 @@ (if (~is memory*.1 nil) (prn "F - logical 'and' for booleans")) +; Basic comparison operations: lt, le, gt, ge, eq, neq + (reset) (new-trace "lt-literal") (add-fns @@ -282,6 +293,10 @@ (if (~is memory*.1 t) (prn "F - le is the <= inequality operator - 2")) +; Control flow operations: jmp, jif +; These introduce a new type -- 'offset' -- for literals that refer to memory +; locations relative to the current location. + (reset) (new-trace "jmp-skip") (add-fns @@ -357,6 +372,10 @@ (if (~iso memory* (obj 1 2 2 4 3 nil 4 3)) (prn "F - 'jif' can take a negative offset to make backward jumps")) +; Data movement relies on addressing modes: +; 'direct' - refers to a memory location; default for most types. +; 'literal' - directly encoded in the code; implicit for some types like 'offset'. + (reset) (new-trace "direct-addressing") (add-fns @@ -368,6 +387,11 @@ (if (~iso memory* (obj 1 34 2 34)) (prn "F - 'copy' performs direct addressing")) +; 'Indirect' addressing refers to an address stored in a memory location. +; Indicated by the metadata 'deref'. Usually requires an address type. +; In the test below, the memory location 1 contains '2', so an indirect read +; of location 1 returns the value of location 2. + (reset) (new-trace "indirect-addressing") (add-fns @@ -380,6 +404,9 @@ (if (~iso memory* (obj 1 2 2 34 3 34)) (prn "F - 'copy' performs indirect addressing")) +; Output args can use indirect addressing. In the test below the value is +; stored at the location stored in location 1 (i.e. location 2). + (reset) (new-trace "indirect-addressing-oarg") (add-fns @@ -392,6 +419,12 @@ (if (~iso memory* (obj 1 2 2 36)) (prn "F - instructions can perform indirect addressing on output arg")) +; Until now we've dealt with scalar types like integers and booleans and +; addresses. We can also have compound types: arrays and records. +; +; 'get' accesses fields in records +; 'index' accesses indices in arrays + (reset) (new-trace "get-record") (add-fns @@ -504,6 +537,10 @@ (if (~iso memory* (obj 1 2 2 23 3 nil 4 24 5 t 6 1 7 4)) (prn "F - 'index-address' returns addresses of indices of arrays")) +; todo: test that out-of-bounds access throws an error + +; Array values know their length. Record lengths are saved in the types table. + (reset) (new-trace "len-array") (add-fns @@ -519,6 +556,8 @@ (if (~iso memory* (obj 1 2 2 23 3 nil 4 24 5 t 6 2)) (prn "F - 'len' accesses length of array")) +; 'sizeof' is a helper to determine the amount of memory required by a type. + (reset) (new-trace "sizeof-record") (add-fns @@ -539,7 +578,7 @@ (if (~is memory*.1 3) (prn "F - 'sizeof' is different from number of elems")) -; todo: test that out-of-bounds access throws an error +; Regardless of a type's length, you can move it around with 'copy'. (reset) (new-trace "compound-operand") @@ -554,6 +593,186 @@ (if (~iso memory* (obj 1 34 2 nil 3 34 4 nil)) (prn "F - ops can operate on records spanning multiple locations")) +; Just like the table of types is centralized, functions are conceptualized as +; a centralized table of operations just like the 'primitives' we've seen so +; far. If you create a function you can call it like any other op. + +(reset) +(new-trace "new-fn") +(add-fns + '((test1 + ((3 integer) <- add (1 integer) (2 integer))) + (main + ((1 integer) <- copy (1 literal)) + ((2 integer) <- copy (3 literal)) + (test1)))) +(run 'main) +;? (prn memory*) +(if (~iso memory* (obj 1 1 2 3 3 4)) + (prn "F - calling a user-defined function runs its instructions")) +;? (quit) + +(reset) +(new-trace "new-fn-once") +(add-fns + '((test1 + ((1 integer) <- copy (1 literal))) + (main + (test1)))) +(if (~is 2 (run 'main)) + (prn "F - calling a user-defined function runs its instructions exactly once")) +;? (quit) + +; User-defined functions communicate with their callers through two +; primitives: +; +; 'arg' - to access inputs +; 'reply' - to return outputs + +(reset) +(new-trace "new-fn-reply") +(add-fns + '((test1 + ((3 integer) <- add (1 integer) (2 integer)) + (reply) + ((4 integer) <- copy (34 literal))) + (main + ((1 integer) <- copy (1 literal)) + ((2 integer) <- copy (3 literal)) + (test1)))) +(run 'main) +;? (prn memory*) +(if (~iso memory* (obj 1 1 2 3 3 4)) + (prn "F - 'reply' stops executing the current function")) +;? (quit) + +(reset) +(new-trace "new-fn-reply-nested") +(add-fns + `((test1 + ((3 integer) <- test2)) + (test2 + (reply (2 integer))) + (main + ((2 integer) <- copy (34 literal)) + (test1)))) +(run 'main) +;? (prn memory*) +(if (~iso memory* (obj 2 34 3 34)) + (prn "F - 'reply' stops executing any callers as necessary")) +;? (quit) + +(reset) +(new-trace "new-fn-reply-once") +(add-fns + '((test1 + ((3 integer) <- add (1 integer) (2 integer)) + (reply) + ((4 integer) <- copy (34 literal))) + (main + ((1 integer) <- copy (1 literal)) + ((2 integer) <- copy (3 literal)) + (test1)))) +(if (~is 4 (run 'main)) ; last reply sometimes not counted. worth fixing? + (prn "F - 'reply' executes instructions exactly once")) +;? (quit) + +(reset) +(new-trace "new-fn-arg-sequential") +(add-fns + '((test1 + ((4 integer) <- arg) + ((5 integer) <- arg) + ((3 integer) <- add (4 integer) (5 integer)) + (reply) + ((4 integer) <- copy (34 literal))) + (main + ((1 integer) <- copy (1 literal)) + ((2 integer) <- copy (3 literal)) + (test1 (1 integer) (2 integer)) + ))) +(run 'main) +;? (prn memory*) +(if (~iso memory* (obj 1 1 2 3 3 4 + ; add-fn's temporaries + 4 1 5 3)) + (prn "F - 'arg' accesses in order the operands of the most recent function call (the caller)")) +;? (quit) + +(reset) +(new-trace "new-fn-arg-random-access") +(add-fns + '((test1 + ((5 integer) <- arg 1) + ((4 integer) <- arg 0) + ((3 integer) <- add (4 integer) (5 integer)) + (reply) + ((4 integer) <- copy (34 literal))) ; should never run + (main + ((1 integer) <- copy (1 literal)) + ((2 integer) <- copy (3 literal)) + (test1 (1 integer) (2 integer)) + ))) +(run 'main) +;? (prn memory*) +(if (~iso memory* (obj 1 1 2 3 3 4 + ; add-fn's temporaries + 4 1 5 3)) + (prn "F - 'arg' with index can access function call arguments out of order")) +;? (quit) + +; todo: test that too few args throws an error +; how should errors be handled? will be unclear until we support concurrency and routine trees. + +(reset) +(new-trace "new-fn-reply-oarg") +(add-fns + '((test1 + ((4 integer) <- arg) + ((5 integer) <- arg) + ((6 integer) <- add (4 integer) (5 integer)) + (reply (6 integer)) + ((4 integer) <- copy (34 literal))) + (main + ((1 integer) <- copy (1 literal)) + ((2 integer) <- copy (3 literal)) + ((3 integer) <- test1 (1 integer) (2 integer))))) +(run 'main) +;? (prn memory*) +(if (~iso memory* (obj 1 1 2 3 3 4 + ; add-fn's temporaries + 4 1 5 3 6 4)) + (prn "F - 'reply' can take aguments that are returned, or written back into output args of caller")) + +(reset) +(new-trace "new-fn-reply-oarg-multiple") +(add-fns + '((test1 + ((4 integer) <- arg) + ((5 integer) <- arg) + ((6 integer) <- add (4 integer) (5 integer)) + (reply (6 integer) (5 integer)) + ((4 integer) <- copy (34 literal))) + (main + ((1 integer) <- copy (1 literal)) + ((2 integer) <- copy (3 literal)) + ((3 integer) (7 integer) <- test1 (1 integer) (2 integer))))) +(run 'main) +;? (prn memory*) +(if (~iso memory* (obj 1 1 2 3 3 4 7 3 + ; add-fn's temporaries + 4 1 5 3 6 4)) + (prn "F - 'reply' permits a function to return multiple values at once")) + +; 'type' and 'otype' let us create generic functions that run different code +; based on what args the caller provides, or what oargs the caller expects. +; +; These operations are more experimental than their surroundings; we might +; eventually need more detailed access to the calling instruction. +; +; There's also the open question of how to deal with dynamic-typing situations +; where the caller doesn't know the type of its arg/oarg. + (reset) (new-trace "dispatch-otype") (add-fns @@ -638,6 +857,25 @@ 4 'integer 5 nil 6 3 7 4 8 7)) (prn "F - different calls can exercise different clauses of the same function (internals)")) +; Our control operators are quite inconvenient to use, so mu provides a +; lightweight tool called 'convert-braces' to work in a slightly more +; convenient format with nested braces: +; +; { +; some instructions +; { +; more instructions +; } +; } +; +; Braces are just labels, they require no special parsing. The operations +; 'break' and 'continue' jump to just after the enclosing '}' and '{' +; respectively. +; +; Conditional and unconditional 'break' and 'continue' should give us 80% of +; the benefits of the control-flow primitives we're used to in other +; languages, like 'if', 'while', 'for', etc. + (reset) (new-trace "convert-braces") (if (~iso (convert-braces '(((1 integer) <- copy (4 literal)) @@ -779,6 +1017,8 @@ (if (~iso memory* (obj 1 4 2 4 3 nil 4 34)) (prn "F - continue might never trigger")) +; A rudimentary memory allocator. Eventually we want to write this in mu. + (reset) (new-trace "new-primitive") (let before Memory-in-use-until @@ -819,6 +1059,13 @@ (if (~iso Memory-in-use-until (+ before 6)) (prn "F - 'new' on primitive arrays increments high-water mark by their (variable) size"))) +; A rudimentary process scheduler. You can 'run' multiple functions at once, +; and they share the virtual processor. +; There's also a 'fork' primitive to let functions create new threads of +; execution. +; Eventually we want to allow callers to influence how much of their CPU they +; give to their 'children', or to rescind a child's running privileges. + (reset) (new-trace "scheduler") (add-fns @@ -842,4 +1089,11 @@ ("run" "f2 0") )) -(reset) +; The scheduler needs to keep track of the call stack for each thread. +; Eventually we'll want to save this information in mu's address space itself, +; along with the types array, the magic buffers for args and oargs, and so on. +; +; Eventually we want the right stack-management primitives to build delimited +; continuations in mu. + +(reset) ; end file with this to persist the trace for the final test diff --git a/sys.arc b/sys.arc deleted file mode 100644 index b5f9c436..00000000 --- a/sys.arc +++ /dev/null @@ -1,58 +0,0 @@ -(load "mu.arc") - -; memory map: 0-2 for convenience constants -(enq (fn () - (run `(((0 integer) <- literal 0) - ((1 integer) <- literal 1) - ((2 integer) <- literal 2)))) - initialization-fns*) - -(enq (fn () - (build-type-table) - initialization-fns*) - -(= Free 3) -(= Type-array Free) -(def build-type-table () - (allocate-type-array) - (build-types) - (fill-in-type-array)) - -(def allocate-type-array () - (= memory*.Free len.types*) - (++ Free) - (++ Free len.types*)) - -(def build-types () - (each type types* ; todo - ( - -(def sizeof (typeinfo) - (if (~or typeinfo!record typeinfo!array) - typeinfo!size - typeinfo!record - (sum idfn - (accum yield - (each elem typeinfo!elems - (yield (sizeof type*.elem))))) - typeinfo!array - (* (sizeof (type* typeinfo!elem)) - ( - - -;; 'new' - simple slab allocator. Intended only to carve out isolated memory -;; for different threads/routines as they request. -; memory map: 3 for root custodian (size 1) -(= Root_custodian 3) -(= Allocator_start 1000) ; lower locations reserved - -(enq (fn () - (run `(((,Root_custodian location) <- literal ,Allocator_start)))) - initialization-fns*) - -; (type-addr val) <- new (custodian x), (type t) -; memory map: 4-5 locals for slab allocator -(init-fn new - ((4 integer-address) <- copy (3 location)) - ((3 location) <- add (3 location) (1 integer)) - (reply (4 integer-address))) diff --git a/sys.arc.t b/sys.arc.t deleted file mode 100644 index 3a5d0acf..00000000 --- a/sys.arc.t +++ /dev/null @@ -1,23 +0,0 @@ -(load "new.arc") - -(reset) -;? (prn memory*) -(if (~iso memory*.Root_custodian Allocator_start) - (prn "F - allocator initialized")) - -(reset) -(add-fns - '((main - ((x integer-address) <- new) - ((x integer-address deref) <- literal 34)))) -(run function*!main) -;? (prn memory*) -(if (~iso memory*.Root_custodian (+ Allocator_start 1)) - (prn "F - 'new' increments allocator pointer")) -(if (~iso memory*.Allocator_start 34) - (prn "F - 'new' returns old location")) - -; other tests to express: -; no other function can increment the pointer -; no later clause can increment the pointer after this base clause -; multiple threads/routines can't call the allocator at once -- cgit 1.4.1-2-gfad0