diff options
author | Kartik K. Agaram <vc@akkartik.com> | 2017-06-19 21:47:07 -0700 |
---|---|---|
committer | Kartik K. Agaram <vc@akkartik.com> | 2017-06-19 21:47:07 -0700 |
commit | 248e789e7cedf9dfab25657d3dfa195d7ad0127b (patch) | |
tree | 03db54fbd3c86ecf428eba7421b5b23d2c8f902a /subx | |
parent | e11bec57730f5bbcd9d6780c8dec0cbd4153c50f (diff) | |
download | mu-248e789e7cedf9dfab25657d3dfa195d7ad0127b.tar.gz |
3930 - experimental bytecode interpreter
Diffstat (limited to 'subx')
-rw-r--r-- | subx/000organization.cc | 140 | ||||
-rw-r--r-- | subx/001help.cc | 220 | ||||
-rw-r--r-- | subx/002test.cc | 112 | ||||
-rw-r--r-- | subx/003trace.cc | 399 | ||||
-rw-r--r-- | subx/003trace.test.cc | 124 | ||||
-rw-r--r-- | subx/Readme.md | 1 | ||||
-rwxr-xr-x | subx/build | 106 | ||||
-rwxr-xr-x | subx/clean | 6 |
8 files changed, 1108 insertions, 0 deletions
diff --git a/subx/000organization.cc b/subx/000organization.cc new file mode 100644 index 00000000..a29a8813 --- /dev/null +++ b/subx/000organization.cc @@ -0,0 +1,140 @@ +//: You guessed right: the '000' prefix means you should start reading here. +//: +//: This project is set up to load all files with a numeric prefix. Just +//: create a new file and start hacking. +//: +//: The first few files (00*) are independent of what this program does, an +//: experimental skeleton that will hopefully make it both easier for others to +//: understand and more malleable, easier to rewrite and remould into radically +//: different shapes without breaking in subtle corner cases. The premise is +//: that understandability and rewrite-friendliness are related in a virtuous +//: cycle. Doing one well makes it easier to do the other. +//: +//: Lower down, this file contains a legal, bare-bones C++ program. It doesn't +//: do anything yet; subsequent files will contain :(...) directives to insert +//: lines into it. For example: +//: :(after "more events") +//: This directive means: insert the following lines after a line in the +//: program containing the words "more events". +//: +//: A simple tool is included to 'tangle' all the files together in sequence +//: according to their directives into a single source file containing all the +//: code for the project, and then feed the source file to the compiler. +//: (It'll drop these comments starting with a '//:' prefix that only make +//: sense before tangling.) +//: +//: Directives free up the programmer to order code for others to read rather +//: than as forced by the computer or compiler. Each individual feature can be +//: organized in a self-contained 'layer' that adds code to many different data +//: structures and functions all over the program. The right decomposition into +//: layers will let each layer make sense in isolation. +//: +//: "If I look at any small part of it, I can see what is going on -- I don't +//: need to refer to other parts to understand what something is doing. +//: +//: If I look at any large part in overview, I can see what is going on -- I +//: don't need to know all the details to get it. +//: +//: Every level of detail is as locally coherent and as well thought-out as +//: any other level." +//: +//: -- Richard Gabriel, "The Quality Without A Name" +//: (http://dreamsongs.com/Files/PatternsOfSoftware.pdf, page 42) +//: +//: Directives are powerful; they permit inserting or modifying any point in +//: the program. Using them tastefully requires mapping out specific lines as +//: waypoints for future layers to hook into. Often such waypoints will be in +//: comments, capitalized to hint that other layers rely on their presence. +//: +//: A single waypoint might have many different code fragments hooking into +//: it from all over the codebase. Use 'before' directives to insert +//: code at a location in order, top to bottom, and 'after' directives to +//: insert code in reverse order. By convention waypoints intended for insertion +//: before begin with 'End'. Notice below how the layers line up above the "End +//: Foo" waypoint. +//: +//: File 001 File 002 File 003 +//: ============ =================== =================== +//: // Foo +//: ------------ +//: <---- :(before "End Foo") +//: .... +//: ... +//: ------------ +//: <---------------------------- :(before "End Foo") +//: .... +//: ... +//: // End Foo +//: ============ +//: +//: Here's part of a layer in color: http://i.imgur.com/0eONnyX.png. Directives +//: are shaded dark. +//: +//: Layers do more than just shuffle code around. In a well-organized codebase +//: it should be possible to stop loading after any file/layer, build and run +//: the program, and pass all tests for loaded features. (Relevant is +//: http://youtube.com/watch?v=c8N72t7aScY, a scene from "2001: A Space +//: Odyssey".) Get into the habit of running the included script called +//: 'test_layers' before you commit any changes. +//: +//: This 'subsetting guarantee' ensures that this directory contains a +//: cleaned-up narrative of the evolution of this codebase. Organizing +//: autobiographically allows a newcomer to rapidly orient himself, reading the +//: first few files to understand a simple gestalt of a program's core purpose +//: and features, and later gradually working his way through other features as +//: the need arises. +//: +//: Programmers shouldn't need to understand everything about a program to hack +//: on it. But they shouldn't be prevented from a thorough understanding of +//: each aspect either. The goal of layers is to reward curiosity. + +// Includes +// End Includes + +// Types +// End Types + +// Prototypes are auto-generated in the 'build' script; define your functions +// in any order. Just be sure to declare each function header all on one line. +// Our auto-generation scripts are too minimal and simple-minded to handle +// anything else. +#include "function_list" // by convention, files ending with '_list' are auto-generated + +// Globals +// +// All statements in this section should always define a single variable on a +// single line. The 'build' script will simple-mindedly auto-generate extern +// declarations for them. Don't forget to define (not just declare) constants +// with extern linkage in this section, since C++ global constants have +// internal linkage by default. +// +// End Globals + +int main(int argc, char* argv[]) { + atexit(teardown); + + // End One-time Setup + + // Commandline Parsing + // End Commandline Parsing + + return 0; // End Main +} + +// Unit Tests +// End Unit Tests + +//: our first directive; will move the include above the program +:(before "End Includes") +#include <stdlib.h> + +//: Without directives or with the :(code) directive, lines get added at the +//: end. +:(code) +void setup() { + // End Setup +} + +void teardown() { + // End Teardown +} diff --git a/subx/001help.cc b/subx/001help.cc new file mode 100644 index 00000000..d20d8144 --- /dev/null +++ b/subx/001help.cc @@ -0,0 +1,220 @@ +//: Everything this project/binary supports. +//: This should give you a sense for what to look forward to in later layers. + +:(before "End Commandline Parsing") +if (argc != 1) { + //: this is the functionality later layers will provide + // currently no automated tests for commandline arg parsing + return 1; +} + +//: Support for option parsing. +//: Options always begin with '--' and are always the first arguments. An +//: option will never follow a non-option. +:(before "End Commandline Parsing") +char** arg = &argv[1]; +while (argc > 1 && starts_with(*arg, "--")) { + if (false) + ; // no-op branch just so any further additions can consistently always start with 'else' + // End Commandline Options(*arg) + else + cerr << "skipping unknown option " << *arg << '\n'; + --argc; ++argv; ++arg; +} + +//:: Helper function used by the above fragment of code (and later layers too, +//:: who knows?). +//: The :(code) directive appends function definitions to the end of the +//: project. Regardless of where functions are defined, we can call them +//: anywhere we like as long as we format the function header in a specific +//: way: put it all on a single line without indent, end the line with ') {' +//: and no trailing whitespace. As long as functions uniformly start this +//: way, our 'build' script contains a little command to automatically +//: generate declarations for them. +:(code) +bool is_equal(char* s, const char* lit) { + return strncmp(s, lit, strlen(lit)) == 0; +} + +bool starts_with(const string& s, const string& pat) { + string::const_iterator a=s.begin(), b=pat.begin(); + for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + if (*a != *b) return false; + return b == pat.end(); +} + +//: I'll throw some style conventions here for want of a better place for them. +//: As a rule I hate style guides. Do what you want, that's my motto. But since +//: we're dealing with C/C++, the one big thing we want to avoid is undefined +//: behavior. If a compiler ever encounters undefined behavior it can make +//: your program do anything it wants. +//: +//: For reference, my checklist of undefined behaviors to watch out for: +//: out-of-bounds access +//: uninitialized variables +//: use after free +//: dereferencing invalid pointers: null, a new of size 0, others +//: +//: casting a large number to a type too small to hold it +//: +//: integer overflow +//: division by zero and other undefined expressions +//: left-shift by negative count +//: shifting values by more than or equal to the number of bits they contain +//: bitwise operations on signed numbers +//: +//: Converting pointers to types of different alignment requirements +//: T* -> void* -> T*: defined +//: T* -> U* -> T*: defined if non-function pointers and alignment requirements are same +//: function pointers may be cast to other function pointers +//: +//: Casting a numeric value into a value that can't be represented by the target type (either directly or via static_cast) +//: +//: To guard against these, some conventions: +//: +//: 0. Initialize all primitive variables in functions and constructors. +//: +//: 1. Minimize use of pointers and pointer arithmetic. Avoid 'new' and +//: 'delete' as far as possible. Rely on STL to perform memory management to +//: avoid use-after-free issues (and memory leaks). +//: +//: 2. Avoid naked arrays to avoid out-of-bounds access. Never use operator[] +//: except with map. Use at() with STL vectors and so on. +//: +//: 3. Valgrind all the things. +//: +//: 4. Avoid unsigned numbers. Not strictly an undefined-behavior issue, but +//: the extra range doesn't matter, and it's one less confusing category of +//: interaction gotchas to worry about. +//: +//: Corollary: don't use the size() method on containers, since it returns an +//: unsigned and that'll cause warnings about mixing signed and unsigned, +//: yadda-yadda. Instead use this macro below to perform an unsafe cast to +//: signed. We'll just give up immediately if a container's ever too large. +//: Basically, Mu is not concerned about this being a little slower than it +//: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7) +//: +//: Addendum to corollary: We're going to uniformly use int everywhere, to +//: indicate that we're oblivious to number size, and since Clang on 32-bit +//: platforms doesn't yet support multiplication over 64-bit integers, and +//: since multiplying two integers seems like a more common situation to end +//: up in than integer overflow. +:(before "End Includes") +#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size())) + +//: 5. Integer overflow is guarded against at runtime using the -ftrapv flag +//: to the compiler, supported by Clang (GCC version only works sometimes: +//: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work). +:(before "atexit(teardown)") +initialize_signal_handlers(); // not always necessary, but doesn't hurt +//? cerr << INT_MAX+1 << '\n'; // test overflow +//? assert(false); // test SIGABRT +:(code) +// based on https://spin.atomicobject.com/2013/01/13/exceptions-stack-traces-c +void initialize_signal_handlers() { + struct sigaction action; + bzero(&action, sizeof(action)); + action.sa_sigaction = dump_and_exit; + sigemptyset(&action.sa_mask); + sigaction(SIGABRT, &action, NULL); // assert() failure or integer overflow on linux (with -ftrapv) + sigaction(SIGILL, &action, NULL); // integer overflow on OS X (with -ftrapv) +} +void dump_and_exit(int sig, unused siginfo_t* dummy1, unused void* dummy2) { + switch (sig) { + case SIGABRT: + #ifndef __APPLE__ + cerr << "SIGABRT: might be an integer overflow if it wasn't an assert() failure\n"; + _Exit(1); + #endif + break; + case SIGILL: + #ifdef __APPLE__ + cerr << "SIGILL: most likely caused by integer overflow\n"; + _Exit(1); + #endif + break; + default: + break; + } +} +:(before "End Includes") +#include <signal.h> + +//: For good measure we'll also enable SIGFPE. +:(before "atexit(teardown)") +feenableexcept(FE_OVERFLOW | FE_UNDERFLOW); +//? assert(sizeof(int) == 4 && sizeof(float) == 4); +//? // | exp | mantissa +//? int smallest_subnormal = 0b00000000000000000000000000000001; +//? float smallest_subnormal_f = *reinterpret_cast<float*>(&smallest_subnormal); +//? cerr << "ε: " << smallest_subnormal_f << '\n'; +//? cerr << "ε/2: " << smallest_subnormal_f/2 << " (underflow)\n"; // test SIGFPE +:(before "End Includes") +#include <fenv.h> +:(code) +#ifdef __APPLE__ +// Public domain polyfill for feenableexcept on OS X +// http://www-personal.umich.edu/~williams/archive/computation/fe-handling-example.c +int feenableexcept (unsigned int excepts) { + static fenv_t fenv; + unsigned int new_excepts = excepts & FE_ALL_EXCEPT; + unsigned int old_excepts; + if (fegetenv(&fenv)) return -1; + old_excepts = fenv.__control & FE_ALL_EXCEPT; + fenv.__control &= ~new_excepts; + fenv.__mxcsr &= ~(new_excepts << 7); + return fesetenv(&fenv) ? -1 : old_excepts; +} +#endif + +//: 6. Map's operator[] being non-const is fucking evil. +:(before "Globals") // can't generate prototypes for these +// from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map +template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) { + typename T::iterator iter(map.find(key)); + assert(iter != map.end()); + return iter->second; +} +template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) { + typename T::const_iterator iter(map.find(key)); + assert(iter != map.end()); + return iter->second; +} +template<typename T> typename T::mapped_type const& put(T& map, typename T::key_type const& key, typename T::mapped_type const& value) { + map[key] = value; + return map[key]; +} +template<typename T> bool contains_key(T& map, typename T::key_type const& key) { + return map.find(key) != map.end(); +} +template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) { + return map[key]; +} +//: The contract: any container that relies on get_or_insert should never call +//: contains_key. + +//: 7. istreams are a royal pain in the arse. You have to be careful about +//: what subclass you try to putback into. You have to watch out for the pesky +//: failbit and badbit. Just avoid eof() and use this helper instead. +:(code) +bool has_data(istream& in) { + return in && !in.eof(); +} + +:(before "End Includes") +#include <assert.h> + +#include <iostream> +using std::istream; +using std::ostream; +using std::iostream; +using std::cin; +using std::cout; +using std::cerr; +#include <iomanip> + +#include <string.h> +#include <string> +using std::string; + +#define unused __attribute__((unused)) diff --git a/subx/002test.cc b/subx/002test.cc new file mode 100644 index 00000000..1d8efb08 --- /dev/null +++ b/subx/002test.cc @@ -0,0 +1,112 @@ +//: A simple test harness. To create new tests, define functions starting with +//: 'test_'. To run all tests so defined, run: +//: $ ./mu test +//: +//: Every layer should include tests, and can reach into previous layers. +//: However, it seems like a good idea never to reach into tests from previous +//: layers. Every test should be a contract that always passes as originally +//: written, regardless of any later layers. Avoid writing 'temporary' tests +//: that are only meant to work until some layer. + +:(before "End Types") +typedef void (*test_fn)(void); +:(before "Globals") +// move a global ahead into types that we can't generate an extern declaration for +const test_fn Tests[] = { + #include "test_list" // auto-generated; see 'build' script +}; + +:(before "End Globals") +bool Run_tests = false; +bool Passed = true; // set this to false inside any test to indicate failure +long Num_failures = 0; + +:(before "End Includes") +#define CHECK(X) \ + if (Passed && !(X)) { \ + cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << '\n'; \ + Passed = false; \ + return; /* Currently we stop at the very first failure. */ \ + } + +#define CHECK_EQ(X, Y) \ + if (Passed && (X) != (Y)) { \ + cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << " == " << #Y << '\n'; \ + cerr << " got " << (X) << '\n'; /* BEWARE: multiple eval */ \ + Passed = false; \ + return; /* Currently we stop at the very first failure. */ \ + } + +:(before "End Setup") +Passed = true; + +:(before "End Commandline Parsing") +if (argc > 1 && is_equal(argv[1], "test")) { + Run_tests = true; --argc; ++argv; // shift 'test' out of commandline args +} + +:(before "End Main") +if (Run_tests) { + // Test Runs + // we run some tests and then exit; assume no state need be maintained afterward + + // End Test Run Initialization + time_t t; time(&t); + cerr << "C tests: " << ctime(&t); + for (size_t i=0; i < sizeof(Tests)/sizeof(Tests[0]); ++i) { +//? cerr << "running .build/test_list line " << (i+1) << '\n'; + run_test(i); + } + cerr << '\n'; + // End Tests + if (Num_failures > 0) { + cerr << Num_failures << " failure" + << (Num_failures > 1 ? "s" : "") + << '\n'; + return 1; + } + return 0; +} + +:(code) +void run_test(size_t i) { + if (i >= sizeof(Tests)/sizeof(Tests[0])) { + cerr << "no test " << i << '\n'; + return; + } + setup(); + // End Test Setup + (*Tests[i])(); + // End Test Teardown + teardown(); + if (Passed) cerr << '.'; + else ++Num_failures; +} + +bool is_integer(const string& s) { + return s.find_first_not_of("0123456789-") == string::npos // no other characters + && s.find_first_of("0123456789") != string::npos // at least one digit + && s.find('-', 1) == string::npos; // '-' only at first position +} + +int to_integer(string n) { + char* end = NULL; + // safe because string.c_str() is guaranteed to be null-terminated + int result = strtoll(n.c_str(), &end, /*any base*/0); + if (*end != '\0') cerr << "tried to convert " << n << " to number\n"; + assert(*end == '\0'); + return result; +} + +void test_is_integer() { + CHECK(is_integer("1234")); + CHECK(is_integer("-1")); + CHECK(!is_integer("234.0")); + CHECK(is_integer("-567")); + CHECK(!is_integer("89-0")); + CHECK(!is_integer("-")); + CHECK(!is_integer("1e3")); // not supported +} + +:(before "End Includes") +#include <stdlib.h> diff --git a/subx/003trace.cc b/subx/003trace.cc new file mode 100644 index 00000000..2810b420 --- /dev/null +++ b/subx/003trace.cc @@ -0,0 +1,399 @@ +//: The goal of layers is to make programs more easy to understand and more +//: malleable, easy to rewrite in radical ways without accidentally breaking +//: some corner case. Tests further both goals. They help understandability by +//: letting one make small changes and get feedback. What if I wrote this line +//: like so? What if I removed this function call, is it really necessary? +//: Just try it, see if the tests pass. Want to explore rewriting this bit in +//: this way? Tests put many refactorings on a firmer footing. +//: +//: But the usual way we write tests seems incomplete. Refactorings tend to +//: work in the small, but don't help with changes to function boundaries. If +//: you want to extract a new function you have to manually test-drive it to +//: create tests for it. If you want to inline a function its tests are no +//: longer valid. In both cases you end up having to reorganize code as well as +//: tests, an error-prone activity. +//: +//: In response, this layer introduces the notion of *domain-driven* testing. +//: We focus on the domain of inputs the whole program needs to handle rather +//: than the correctness of individual functions. All tests invoke the program +//: in a single way: by calling run() with some input. As the program operates +//: on the input, it traces out a list of _facts_ deduced about the domain: +//: trace("label") << "fact 1: " << val; +//: +//: Tests can now check these facts: +//: :(scenario foo) +//: 34 # call run() with this input +//: +label: fact 1: 34 # 'run' should have deduced this fact +//: -label: fact 1: 35 # the trace should not contain such a fact +//: +//: Since we never call anything but the run() function directly, we never have +//: to rewrite the tests when we reorganize the internals of the program. We +//: just have to make sure our rewrite deduces the same facts about the domain, +//: and that's something we're going to have to do anyway. +//: +//: To avoid the combinatorial explosion of integration tests, each layer +//: mainly logs facts to the trace with a common *label*. All tests in a layer +//: tend to check facts with this label. Validating the facts logged with a +//: specific label is like calling functions of that layer directly. +//: +//: To build robust tests, trace facts about your domain rather than details of +//: how you computed them. +//: +//: More details: http://akkartik.name/blog/tracing-tests +//: +//: --- +//: +//: Between layers and domain-driven testing, programming starts to look like a +//: fundamentally different activity. Instead of a) superficial, b) local rules +//: on c) code [like say http://blog.bbv.ch/2013/06/05/clean-code-cheat-sheet], +//: we allow programmers to engage with the a) deep, b) global structure of the +//: c) domain. If you can systematically track discontinuities in the domain, +//: you don't care if the code used gotos as long as it passed the tests. If +//: tests become more robust to run it becomes easier to try out radically +//: different implementations for the same program. If code is super-easy to +//: rewrite, it becomes less important what indentation style it uses, or that +//: the objects are appropriately encapsulated, or that the functions are +//: referentially transparent. +//: +//: Instead of plumbing, programming becomes building and gradually refining a +//: map of the environment the program must operate under. Whether a program is +//: 'correct' at a given point in time is a red herring; what matters is +//: avoiding regression by monotonically nailing down the more 'eventful' parts +//: of the terrain. It helps readers new and old, and rewards curiosity, to +//: organize large programs in self-similar hierarchies of example scenarios +//: colocated with the code that makes them work. +//: +//: "Programming properly should be regarded as an activity by which +//: programmers form a mental model, rather than as production of a program." +//: -- Peter Naur (http://alistair.cockburn.us/ASD+book+extract%3A+%22Naur,+Ehn,+Musashi%22) + +:(before "End Types") +struct trace_line { + int depth; // optional field just to help browse traces later + string label; + string contents; + trace_line(string l, string c) :depth(0), label(l), contents(c) {} + trace_line(int d, string l, string c) :depth(d), label(l), contents(c) {} +}; + +:(before "End Globals") +bool Hide_errors = false; +bool Dump_trace = false; +string Dump_label = ""; +:(before "End Setup") +Hide_errors = false; +Dump_trace = false; +Dump_label = ""; + +:(before "End Types") +// Pre-define some global constants that trace_stream needs to know about. +// Since they're in the Types section, they'll be included in any cleaved +// compilation units. So no extern linkage. +const int Max_depth = 9999; +const int Error_depth = 0; // definitely always print errors +const int App_depth = 2; // temporarily where all Mu code will trace to + +struct trace_stream { + vector<trace_line> past_lines; + // accumulator for current line + ostringstream* curr_stream; + string curr_label; + int curr_depth; + int callstack_depth; + int collect_depth; + ofstream null_stream; // never opens a file, so writes silently fail + trace_stream() :curr_stream(NULL), curr_depth(Max_depth), callstack_depth(0), collect_depth(Max_depth) {} + ~trace_stream() { if (curr_stream) delete curr_stream; } + + ostream& stream(string label) { + return stream(Max_depth, label); + } + + ostream& stream(int depth, string label) { + if (depth > collect_depth) return null_stream; + curr_stream = new ostringstream; + curr_label = label; + curr_depth = depth; + return *curr_stream; + } + + // be sure to call this before messing with curr_stream or curr_label + void newline(); + // useful for debugging + string readable_contents(string label); // empty label = show everything +}; + +:(code) +void trace_stream::newline() { + if (!curr_stream) return; + string curr_contents = curr_stream->str(); + if (!curr_contents.empty()) { + past_lines.push_back(trace_line(curr_depth, trim(curr_label), curr_contents)); // preserve indent in contents + if ((!Hide_errors && curr_label == "error") + || Dump_trace + || (!Dump_label.empty() && curr_label == Dump_label)) + cerr << curr_label << ": " << curr_contents << '\n'; + } + delete curr_stream; + curr_stream = NULL; + curr_label.clear(); + curr_depth = Max_depth; +} + +string trace_stream::readable_contents(string label) { + ostringstream output; + label = trim(label); + for (vector<trace_line>::iterator p = past_lines.begin(); p != past_lines.end(); ++p) + if (label.empty() || label == p->label) { + output << std::setw(4) << p->depth << ' ' << p->label << ": " << p->contents << '\n'; + } + return output.str(); +} + +:(before "End Globals") +trace_stream* Trace_stream = NULL; +int Trace_errors = 0; // used only when Trace_stream is NULL + +:(before "End Includes") +#define CLEAR_TRACE delete Trace_stream, Trace_stream = new trace_stream; + +// Top-level helper. IMPORTANT: can't nest +#define trace(...) !Trace_stream ? cerr /*print nothing*/ : Trace_stream->stream(__VA_ARGS__) + +// Just for debugging; 'git log' should never show any calls to 'dbg'. +#define dbg trace(0, "a") +#define DUMP(label) if (Trace_stream) cerr << Trace_stream->readable_contents(label); + +// Errors are a special layer. +#define raise (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Error_depth, "error")) +// If we aren't yet sure how to deal with some corner case, use assert_for_now +// to indicate that it isn't an inviolable invariant. +#define assert_for_now assert + +// Inside tests, fail any tests that displayed (unexpected) errors. +// Expected errors in tests should always be hidden and silently checked for. +:(before "End Test Teardown") +if (Passed && !Hide_errors && trace_contains_errors()) { + Passed = false; +} +:(code) +bool trace_contains_errors() { + return Trace_errors > 0 || trace_count("error") > 0; +} + +:(before "End Types") +struct end {}; +:(code) +ostream& operator<<(ostream& os, unused end) { + if (Trace_stream) Trace_stream->newline(); + return os; +} + +:(before "End Globals") +bool Save_trace = false; + +// Trace_stream is a resource, lease_tracer uses RAII to manage it. +:(before "End Types") +struct lease_tracer { + lease_tracer(); + ~lease_tracer(); +}; +:(code) +lease_tracer::lease_tracer() { Trace_stream = new trace_stream; } +lease_tracer::~lease_tracer() { + if (!Trace_stream) return; // in case tests close Trace_stream + if (Save_trace) { + ofstream fout("last_trace"); + fout << Trace_stream->readable_contents(""); + fout.close(); + } + delete Trace_stream, Trace_stream = NULL; +} +:(before "End Includes") +#define START_TRACING_UNTIL_END_OF_SCOPE lease_tracer leased_tracer; +:(before "End Test Setup") +START_TRACING_UNTIL_END_OF_SCOPE + +:(before "End Includes") +#define CHECK_TRACE_CONTENTS(...) check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__) + +#define CHECK_TRACE_CONTAINS_ERRORS() CHECK(trace_contains_errors()) +#define CHECK_TRACE_DOESNT_CONTAIN_ERRORS() \ + if (Passed && trace_contains_errors()) { \ + cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected errors\n"; \ + DUMP("error"); \ + Passed = false; \ + return; \ + } + +#define CHECK_TRACE_COUNT(label, count) \ + if (Passed && trace_count(label) != (count)) { \ + cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \ + cerr << " got " << trace_count(label) << '\n'; /* multiple eval */ \ + DUMP(label); \ + Passed = false; \ + return; /* Currently we stop at the very first failure. */ \ + } + +#define CHECK_TRACE_DOESNT_CONTAIN(...) CHECK(trace_doesnt_contain(__VA_ARGS__)) + +:(code) +bool check_trace_contents(string FUNCTION, string FILE, int LINE, string expected) { + if (!Passed) return false; + if (!Trace_stream) return false; + vector<string> expected_lines = split(expected, ""); + int curr_expected_line = 0; + while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) + ++curr_expected_line; + if (curr_expected_line == SIZE(expected_lines)) return true; + string label, contents; + split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); + for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { + if (label != p->label) continue; + if (contents != trim(p->contents)) continue; + ++curr_expected_line; + while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty()) + ++curr_expected_line; + if (curr_expected_line == SIZE(expected_lines)) return true; + split_label_contents(expected_lines.at(curr_expected_line), &label, &contents); + } + + if (line_exists_anywhere(label, contents)) { + cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n"; + DUMP(""); + } + else { + cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n"; + DUMP(label); + } + Passed = false; + return false; +} + +void split_label_contents(const string& s, string* label, string* contents) { + static const string delim(": "); + size_t pos = s.find(delim); + if (pos == string::npos) { + *label = ""; + *contents = trim(s); + } + else { + *label = trim(s.substr(0, pos)); + *contents = trim(s.substr(pos+SIZE(delim))); + } +} + +bool line_exists_anywhere(const string& label, const string& contents) { + for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { + if (label != p->label) continue; + if (contents == trim(p->contents)) return true; + } + return false; +} + +int trace_count(string label) { + return trace_count(label, ""); +} + +int trace_count(string label, string line) { + if (!Trace_stream) return 0; + long result = 0; + for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { + if (label == p->label) { + if (line == "" || trim(line) == trim(p->contents)) + ++result; + } + } + return result; +} + +int trace_count_prefix(string label, string prefix) { + if (!Trace_stream) return 0; + long result = 0; + for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { + if (label == p->label) { + if (starts_with(trim(p->contents), trim(prefix))) + ++result; + } + } + return result; +} + +bool trace_doesnt_contain(string label, string line) { + return trace_count(label, line) == 0; +} + +bool trace_doesnt_contain(string expected) { + vector<string> tmp = split_first(expected, ": "); + return trace_doesnt_contain(tmp.at(0), tmp.at(1)); +} + +vector<string> split(string s, string delim) { + vector<string> result; + size_t begin=0, end=s.find(delim); + while (true) { + if (end == string::npos) { + result.push_back(string(s, begin, string::npos)); + break; + } + result.push_back(string(s, begin, end-begin)); + begin = end+SIZE(delim); + end = s.find(delim, begin); + } + return result; +} + +vector<string> split_first(string s, string delim) { + vector<string> result; + size_t end=s.find(delim); + result.push_back(string(s, 0, end)); + if (end != string::npos) + result.push_back(string(s, end+SIZE(delim), string::npos)); + return result; +} + +string trim(const string& s) { + string::const_iterator first = s.begin(); + while (first != s.end() && isspace(*first)) + ++first; + if (first == s.end()) return ""; + + string::const_iterator last = --s.end(); + while (last != s.begin() && isspace(*last)) + --last; + ++last; + return string(first, last); +} + +:(before "End Includes") +#include <vector> +using std::vector; +#include <list> +using std::list; +#include <map> +using std::map; +#include <set> +using std::set; +#include <algorithm> + +#include <sstream> +using std::istringstream; +using std::ostringstream; + +#include <fstream> +using std::ifstream; +using std::ofstream; + +:(before "End Globals") +//: In future layers we'll use the depth field as follows: +//: +//: Errors will be depth 0. +//: Mu 'applications' will be able to use depths 1-100 as they like. +//: Primitive statements will occupy 101-9989 +extern const int Initial_callstack_depth = 101; +extern const int Max_callstack_depth = 9989; +//: Finally, details of primitive Mu statements will occupy depth 9990-9999 +//: (more on that later as well) +//: +//: This framework should help us hide some details at each level, mixing +//: static ideas like layers with the dynamic notion of call-stack depth. diff --git a/subx/003trace.test.cc b/subx/003trace.test.cc new file mode 100644 index 00000000..67b4c345 --- /dev/null +++ b/subx/003trace.test.cc @@ -0,0 +1,124 @@ +void test_trace_check_compares() { + trace("test layer") << "foo" << end(); + CHECK_TRACE_CONTENTS("test layer: foo"); +} + +void test_trace_check_ignores_other_layers() { + trace("test layer 1") << "foo" << end(); + trace("test layer 2") << "bar" << end(); + CHECK_TRACE_CONTENTS("test layer 1: foo"); + CHECK_TRACE_DOESNT_CONTAIN("test layer 2: foo"); +} + +void test_trace_check_ignores_leading_whitespace() { + trace("test layer 1") << " foo" << end(); + CHECK_EQ(trace_count("test layer 1", /*too little whitespace*/"foo"), 1); + CHECK_EQ(trace_count("test layer 1", /*too much whitespace*/" foo"), 1); +} + +void test_trace_check_ignores_other_lines() { + trace("test layer 1") << "foo" << end(); + trace("test layer 1") << "bar" << end(); + CHECK_TRACE_CONTENTS("test layer 1: foo"); +} + +void test_trace_check_ignores_other_lines2() { + trace("test layer 1") << "foo" << end(); + trace("test layer 1") << "bar" << end(); + CHECK_TRACE_CONTENTS("test layer 1: bar"); +} + +void test_trace_ignores_trailing_whitespace() { + trace("test layer 1") << "foo\n" << end(); + CHECK_TRACE_CONTENTS("test layer 1: foo"); +} + +void test_trace_ignores_trailing_whitespace2() { + trace("test layer 1") << "foo " << end(); + CHECK_TRACE_CONTENTS("test layer 1: foo"); +} + +void test_trace_orders_across_layers() { + trace("test layer 1") << "foo" << end(); + trace("test layer 2") << "bar" << end(); + trace("test layer 1") << "qux" << end(); + CHECK_TRACE_CONTENTS("test layer 1: footest layer 2: bartest layer 1: qux"); +} + +void test_trace_supports_count() { + trace("test layer 1") << "foo" << end(); + trace("test layer 1") << "foo" << end(); + CHECK_EQ(trace_count("test layer 1", "foo"), 2); +} + +void test_trace_supports_count2() { + trace("test layer 1") << "foo" << end(); + trace("test layer 1") << "bar" << end(); + CHECK_EQ(trace_count("test layer 1"), 2); +} + +void test_trace_count_ignores_trailing_whitespace() { + trace("test layer 1") << "foo\n" << end(); + CHECK_EQ(trace_count("test layer 1", "foo"), 1); +} + +// pending: DUMP tests +// pending: readable_contents() adds newline if necessary. +// pending: raise also prints to stderr. +// pending: raise doesn't print to stderr if Hide_errors is set. +// pending: raise doesn't have to be saved if Hide_errors is set, just printed. +// pending: raise prints to stderr if Trace_stream is NULL. +// pending: raise prints to stderr if Trace_stream is NULL even if Hide_errors is set. + +// can't check trace because trace methods call 'split' + +void test_split_returns_at_least_one_elem() { + vector<string> result = split("", ","); + CHECK_EQ(result.size(), 1); + CHECK_EQ(result.at(0), ""); +} + +void test_split_returns_entire_input_when_no_delim() { + vector<string> result = split("abc", ","); + CHECK_EQ(result.size(), 1); + CHECK_EQ(result.at(0), "abc"); +} + +void test_split_works() { + vector<string> result = split("abc,def", ","); + CHECK_EQ(result.size(), 2); + CHECK_EQ(result.at(0), "abc"); + CHECK_EQ(result.at(1), "def"); +} + +void test_split_works2() { + vector<string> result = split("abc,def,ghi", ","); + CHECK_EQ(result.size(), 3); + CHECK_EQ(result.at(0), "abc"); + CHECK_EQ(result.at(1), "def"); + CHECK_EQ(result.at(2), "ghi"); +} + +void test_split_handles_multichar_delim() { + vector<string> result = split("abc,,def,,ghi", ",,"); + CHECK_EQ(result.size(), 3); + CHECK_EQ(result.at(0), "abc"); + CHECK_EQ(result.at(1), "def"); + CHECK_EQ(result.at(2), "ghi"); +} + +void test_trim() { + CHECK_EQ(trim(""), ""); + CHECK_EQ(trim(" "), ""); + CHECK_EQ(trim(" "), ""); + CHECK_EQ(trim("a"), "a"); + CHECK_EQ(trim(" a"), "a"); + CHECK_EQ(trim(" a"), "a"); + CHECK_EQ(trim(" ab"), "ab"); + CHECK_EQ(trim("a "), "a"); + CHECK_EQ(trim("a "), "a"); + CHECK_EQ(trim("ab "), "ab"); + CHECK_EQ(trim(" a "), "a"); + CHECK_EQ(trim(" a "), "a"); + CHECK_EQ(trim(" ab "), "ab"); +} diff --git a/subx/Readme.md b/subx/Readme.md new file mode 100644 index 00000000..d68cc331 --- /dev/null +++ b/subx/Readme.md @@ -0,0 +1 @@ +Bytecode interpreter for a subset of the 32-bit x86 ISA. diff --git a/subx/build b/subx/build new file mode 100755 index 00000000..f5ea62c3 --- /dev/null +++ b/subx/build @@ -0,0 +1,106 @@ +#!/bin/sh +# returns 0 on successful build or nothing to build +# non-zero exit status only on error during building +set -e # stop immediately on error + +# [0-9]*.cc -> subx.cc -> subx +# (layers) | | +# tangle $CXX + +# can also be called with a layer to only build until +# $ ./build --until 050 +UNTIL_LAYER=${2:-zzz} + +# there's two mechanisms for fast builds here: +# - if a command is quick to run, always run it but update the result only on any change +# - otherwise run it only if the output is 'older_than' the inputs +# +# avoid combining both mechanisms for a single file +# otherwise you'll see spurious messages about files being updated +# risk: a file may unnecessarily update without changes, causing unnecessary work downstream + +test "$CXX" || export CXX=c++ +test "$CC" || export CC=cc +test "$CFLAGS" || export CFLAGS="-g -O3" +export CFLAGS="$CFLAGS -Wall -Wextra -ftrapv -fno-strict-aliasing" + +# return 1 if $1 is older than _any_ of the remaining args +older_than() { + local target=$1 + shift + if [ ! -e $target ] + then +#? echo "$target doesn't exist" + echo "updating $target" >&2 + return 0 # success + fi + local f + for f in $* + do + if [ $f -nt $target ] + then + echo "updating $target" >&2 + return 0 # success + fi + done + return 1 # failure +} + +# redirect to $1, unless it's already identical +update() { + if [ ! -e $1 ] + then + cat > $1 + else + cat > $1.tmp + diff -q $1 $1.tmp >/dev/null && rm $1.tmp || mv $1.tmp $1 + fi +} + +update_cp() { + if [ ! -e $2/$1 ] + then + cp $1 $2 + elif [ $1 -nt $2/$1 ] + then + cp $1 $2 + fi +} + +noisy_cd() { + cd $1 + echo "-- `pwd`" >&2 +} + +older_than ../enumerate/enumerate ../enumerate/enumerate.cc && { + $CXX $CFLAGS ../enumerate/enumerate.cc -o ../enumerate/enumerate +} + +older_than ../tangle/tangle ../tangle/*.cc && { + noisy_cd ../tangle + { + grep -h "^struct .* {" [0-9]*.cc |sed 's/\(struct *[^ ]*\).*/\1;/' + grep -h "^typedef " [0-9]*.cc + } |update type_list + grep -h "^[^ #].*) {" [0-9]*.cc |sed 's/ {.*/;/' |update function_list + ls [0-9]*.cc |grep -v "\.test\.cc$" |sed 's/.*/#include "&"/' |update file_list + ls [0-9]*.test.cc |sed 's/.*/#include "&"/' |update test_file_list + grep -h "^[[:space:]]*void test_" [0-9]*.cc |sed 's/^\s*void \(.*\)() {$/\1,/' |update test_list + $CXX $CFLAGS boot.cc -o tangle + noisy_cd ../subx # no effect; just to show us returning to the parent directory +} + +LAYERS=$(../enumerate/enumerate --until $UNTIL_LAYER |grep '.cc$') +older_than subx.cc $LAYERS ../enumerate/enumerate ../tangle/tangle && { + # no update here; rely on 'update' calls downstream + ../tangle/tangle $LAYERS > subx.cc +} + +grep -h "^[^[:space:]#].*) {$" subx.cc |grep -v ":.*(" |sed 's/ {.*/;/' |update function_list +grep -h "^\s*void test_" subx.cc |sed 's/^\s*void \(.*\)() {.*/\1,/' |update test_list + +older_than subx subx.cc *_list && { + $CXX $CFLAGS subx.cc -o subx +} + +exit 0 diff --git a/subx/clean b/subx/clean new file mode 100755 index 00000000..6294eb56 --- /dev/null +++ b/subx/clean @@ -0,0 +1,6 @@ +#!/bin/sh + +set -v +rm -rf subx.cc subx *_list *.dSYM +test $# -gt 0 && exit 0 # convenience: 'clean top-level' to leave subsidiary tools alone +rm -rf ../enumerate/enumerate ../tangle/tangle ../tangle/*_list ../*/*.dSYM |