From 2f899b3b2c0e6798237832e5f0a423e280f568fb Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Fri, 3 Jan 2020 20:28:40 -0800 Subject: 5878 The current prototype doesn't really use floating point; drop the guardrails there. --- 001help.cc | 27 --- html/001help.cc.html | 460 ++++++++++++++++++++++++--------------------------- 2 files changed, 217 insertions(+), 270 deletions(-) diff --git a/001help.cc b/001help.cc index ff622162..de1b680f 100644 --- a/001help.cc +++ b/001help.cc @@ -207,33 +207,6 @@ void dump_and_exit(int sig, siginfo_t* /*unused*/, void* /*unused*/) { :(before "End Includes") #include -//: For good measure we'll also enable SIGFPE. -:(before "atexit(reset)") -feenableexcept(FE_OVERFLOW | FE_UNDERFLOW); -//? assert(sizeof(int) == 4 && sizeof(float) == 4); -//? // | exp | mantissa -//? int smallest_subnormal = 0b00000000000000000000000000000001; -//? float smallest_subnormal_f = *reinterpret_cast(&smallest_subnormal); -//? cerr << "ε: " << smallest_subnormal_f << '\n'; -//? cerr << "ε/2: " << smallest_subnormal_f/2 << " (underflow)\n"; // test SIGFPE -:(before "End Includes") -#include -:(code) -#ifdef __APPLE__ -// Public domain polyfill for feenableexcept on OS X -// http://www-personal.umich.edu/~williams/archive/computation/fe-handling-example.c -int feenableexcept(unsigned int excepts) { - static fenv_t fenv; - unsigned int new_excepts = excepts & FE_ALL_EXCEPT; - unsigned int old_excepts; - if (fegetenv(&fenv)) return -1; - old_excepts = fenv.__control & FE_ALL_EXCEPT; - fenv.__control &= ~new_excepts; - fenv.__mxcsr &= ~(new_excepts << 7); - return fesetenv(&fenv) ? -1 : old_excepts; -} -#endif - //: 6. Map's operator[] being non-const is fucking evil. :(before "Globals") // can't generate prototypes for these // from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map diff --git a/html/001help.cc.html b/html/001help.cc.html index ea40d34b..52cd0df3 100644 --- a/html/001help.cc.html +++ b/html/001help.cc.html @@ -63,7 +63,7 @@ if ('onhashchange' in window) { 2 //: This should give you a sense for what to look forward to in later layers. 3 4 :(before "End Commandline Parsing") - 5 if (argc <= 1 || is_equal(argv[1], "--help")) { + 5 if (argc <= 1 || is_equal(argv[1], "--help")) { 6 //: this is the functionality later layers will provide 7 // currently no automated tests for commandline arg parsing 8 cerr << get(Help, "usage"); @@ -74,7 +74,7 @@ if ('onhashchange' in window) { 13 //: Options always begin with '--' and are always the first arguments. An 14 //: option will never follow a non-option. 15 char** arg = &argv[1]; - 16 while (argc > 1 && starts_with(*arg, "--")) { + 16 while (argc > 1 && starts_with(*arg, "--")) { 17 if (false) 18 ; // no-op branch just so any further additions can consistently always start with 'else' 19 // End Commandline Options(*arg) @@ -83,7 +83,7 @@ if ('onhashchange' in window) { 22 --argc; ++argv; ++arg; 23 } 24 - 25 if (is_equal(argv[1], "help")) { + 25 if (is_equal(argv[1], "help")) { 26 if (argc == 2) { 27 cerr << "help on what?\n"; 28 help_contents(); @@ -91,7 +91,7 @@ if ('onhashchange' in window) { 30 } 31 string key(argv[2]); 32 // End Help Special-cases(key) - 33 if (contains_key(Help, key)) { + 33 if (contains_key(Help, key)) { 34 cerr << get(Help, key); 35 return 0; 36 } @@ -119,245 +119,219 @@ if ('onhashchange' in window) { 58 init_help(); 59 :(code) 60 void init_help() { - 61 put(Help, "usage", - 62 "Welcome to SubX, a better way to program in machine code.\n" - 63 "SubX uses a subset of the x86 instruction set. SubX programs will run\n" - 64 "without modification on Linux computers.\n" - 65 "It provides a better experience and better error messages than\n" - 66 "programming directly in machine code, but you have to stick to the\n" - 67 "instructions it supports.\n" - 68 "\n" - 69 "== Ways to invoke subx\n" - 70 "- Run tests:\n" - 71 " subx test\n" - 72 "- See this message:\n" - 73 " subx --help\n" - 74 "- Convert a textual SubX program into a standard ELF binary that you can\n" - 75 " run on your computer:\n" - 76 " subx translate input1.subx input2.subx ... -o <output ELF binary>\n" - 77 "- Run a SubX binary using SubX itself (for better error messages):\n" - 78 " subx run <ELF binary>\n" - 79 "\n" - 80 "== Debugging aids\n" - 81 "- Add '--trace' to any of these commands to save a trace to disk at the end.\n" - 82 " This can run out of memory for long-running commands.\n" - 83 "- Add '--debug' to add information to traces. 'subx --debug translate' will\n" - 84 " save metadata to disk that 'subx --debug --trace run' uses to make traces\n" - 85 " more informative.\n" - 86 "- Add '--dump --trace' to emit a trace incrementally to stderr.\n" - 87 " This approach will work even for long-running programs.\n" - 88 " (Though the combination of flags is counter-intuitive and can probably\n" - 89 " be improved.)\n" - 90 "\n" - 91 "Options starting with '--' must always come before any other arguments.\n" - 92 "\n" - 93 "To start learning how to write SubX programs, see Readme.md (particularly\n" - 94 "the section on the x86 instruction set) and then run:\n" - 95 " subx help\n" - 96 ); - 97 // End Help Texts - 98 } - 99 -100 :(code) -101 bool is_equal(char* s, const char* lit) { -102 return strncmp(s, lit, strlen(lit)) == 0; -103 } -104 -105 bool starts_with(const string& s, const string& pat) { -106 string::const_iterator a=s.begin(), b=pat.begin(); -107 for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) -108 if (*a != *b) return false; -109 return b == pat.end(); -110 } -111 -112 //: I'll throw some style conventions here for want of a better place for them. -113 //: As a rule I hate style guides. Do what you want, that's my motto. But since -114 //: we're dealing with C/C++, the one big thing we want to avoid is undefined -115 //: behavior. If a compiler ever encounters undefined behavior it can make -116 //: your program do anything it wants. -117 //: -118 //: For reference, my checklist of undefined behaviors to watch out for: -119 //: out-of-bounds access -120 //: uninitialized variables -121 //: use after free -122 //: dereferencing invalid pointers: null, a new of size 0, others -123 //: -124 //: casting a large number to a type too small to hold it -125 //: -126 //: integer overflow -127 //: division by zero and other undefined expressions -128 //: left-shift by negative count -129 //: shifting values by more than or equal to the number of bits they contain -130 //: bitwise operations on signed numbers -131 //: -132 //: Converting pointers to types of different alignment requirements -133 //: T* -> void* -> T*: defined -134 //: T* -> U* -> T*: defined if non-function pointers and alignment requirements are same -135 //: function pointers may be cast to other function pointers -136 //: -137 //: Casting a numeric value into a value that can't be represented by the target type (either directly or via static_cast) -138 //: -139 //: To guard against these, some conventions: -140 //: -141 //: 0. Initialize all primitive variables in functions and constructors. -142 //: -143 //: 1. Minimize use of pointers and pointer arithmetic. Avoid 'new' and -144 //: 'delete' as far as possible. Rely on STL to perform memory management to -145 //: avoid use-after-free issues (and memory leaks). -146 //: -147 //: 2. Avoid naked arrays to avoid out-of-bounds access. Never use operator[] -148 //: except with map. Use at() with STL vectors and so on. -149 //: -150 //: 3. Valgrind all the things. -151 //: -152 //: 4. Avoid unsigned numbers. Not strictly an undefined-behavior issue, but -153 //: the extra range doesn't matter, and it's one less confusing category of -154 //: interaction gotchas to worry about. -155 //: -156 //: Corollary: don't use the size() method on containers, since it returns an -157 //: unsigned and that'll cause warnings about mixing signed and unsigned, -158 //: yadda-yadda. Instead use this macro below to perform an unsafe cast to -159 //: signed. We'll just give up immediately if a container's ever too large. -160 //: Basically, Mu is not concerned about this being a little slower than it -161 //: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7) -162 //: -163 //: Addendum to corollary: We're going to uniformly use int everywhere, to -164 //: indicate that we're oblivious to number size, and since Clang on 32-bit -165 //: platforms doesn't yet support multiplication over 64-bit integers, and -166 //: since multiplying two integers seems like a more common situation to end -167 //: up in than integer overflow. -168 :(before "End Includes") -169 #define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size())) -170 -171 //: 5. Integer overflow is guarded against at runtime using the -ftrapv flag -172 //: to the compiler, supported by Clang (GCC version only works sometimes: -173 //: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work). -174 :(before "atexit(reset)") -175 initialize_signal_handlers(); // not always necessary, but doesn't hurt -176 //? cerr << INT_MAX+1 << '\n'; // test overflow -177 //? assert(false); // test SIGABRT -178 :(code) -179 // based on https://spin.atomicobject.com/2013/01/13/exceptions-stack-traces-c -180 void initialize_signal_handlers() { -181 struct sigaction action; -182 bzero(&action, sizeof(action)); -183 action.sa_sigaction = dump_and_exit; -184 sigemptyset(&action.sa_mask); -185 sigaction(SIGABRT, &action, NULL); // assert() failure or integer overflow on linux (with -ftrapv) -186 sigaction(SIGILL, &action, NULL); // integer overflow on OS X (with -ftrapv) -187 } -188 void dump_and_exit(int sig, siginfo_t* /*unused*/, void* /*unused*/) { -189 switch (sig) { -190 case SIGABRT: -191 #ifndef __APPLE__ -192 cerr << "SIGABRT: might be an integer overflow if it wasn't an assert() failure\n"; -193 _Exit(1); -194 #endif -195 break; -196 case SIGILL: -197 #ifdef __APPLE__ -198 cerr << "SIGILL: most likely caused by integer overflow\n"; -199 _Exit(1); -200 #endif -201 break; -202 default: -203 break; -204 } -205 } -206 :(before "End Includes") -207 #include <signal.h> -208 -209 //: For good measure we'll also enable SIGFPE. -210 :(before "atexit(reset)") -211 feenableexcept(FE_OVERFLOW | FE_UNDERFLOW); -212 //? assert(sizeof(int) == 4 && sizeof(float) == 4); -213 //? // | exp | mantissa -214 //? int smallest_subnormal = 0b00000000000000000000000000000001; -215 //? float smallest_subnormal_f = *reinterpret_cast<float*>(&smallest_subnormal); -216 //? cerr << "ε: " << smallest_subnormal_f << '\n'; -217 //? cerr << "ε/2: " << smallest_subnormal_f/2 << " (underflow)\n"; // test SIGFPE -218 :(before "End Includes") -219 #include <fenv.h> -220 :(code) -221 #ifdef __APPLE__ -222 // Public domain polyfill for feenableexcept on OS X -223 // http://www-personal.umich.edu/~williams/archive/computation/fe-handling-example.c -224 int feenableexcept(unsigned int excepts) { -225 static fenv_t fenv; -226 unsigned int new_excepts = excepts & FE_ALL_EXCEPT; -227 unsigned int old_excepts; -228 if (fegetenv(&fenv)) return -1; -229 old_excepts = fenv.__control & FE_ALL_EXCEPT; -230 fenv.__control &= ~new_excepts; -231 fenv.__mxcsr &= ~(new_excepts << 7); -232 return fesetenv(&fenv) ? -1 : old_excepts; -233 } -234 #endif -235 -236 //: 6. Map's operator[] being non-const is fucking evil. -237 :(before "Globals") // can't generate prototypes for these -238 // from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map -239 template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) { -240 typename T::iterator iter(map.find(key)); -241 if (iter == map.end()) { -242 cerr << "get couldn't find key '" << key << "'\n"; -243 assert(iter != map.end()); -244 } -245 return iter->second; -246 } -247 template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) { -248 typename T::const_iterator iter(map.find(key)); -249 if (iter == map.end()) { -250 cerr << "get couldn't find key '" << key << "'\n"; -251 assert(iter != map.end()); -252 } -253 return iter->second; -254 } -255 template<typename T> typename T::mapped_type const& put(T& map, typename T::key_type const& key, typename T::mapped_type const& value) { -256 map[key] = value; -257 return map[key]; -258 } -259 template<typename T> bool contains_key(T& map, typename T::key_type const& key) { -260 return map.find(key) != map.end(); -261 } -262 template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) { -263 return map[key]; -264 } -265 template<typename T> typename T::mapped_type const& put_new(T& map, typename T::key_type const& key, typename T::mapped_type const& value) { -266 assert(map.find(key) == map.end()); -267 map[key] = value; -268 return map[key]; -269 } -270 //: The contract: any container that relies on get_or_insert should never call -271 //: contains_key. -272 -273 //: 7. istreams are a royal pain in the arse. You have to be careful about -274 //: what subclass you try to putback into. You have to watch out for the pesky -275 //: failbit and badbit. Just avoid eof() and use this helper instead. -276 :(code) -277 bool has_data(istream& in) { -278 return in && !in.eof(); -279 } -280 -281 :(before "End Includes") -282 #include <assert.h> -283 -284 #include <iostream> -285 using std::istream; -286 using std::ostream; -287 using std::iostream; -288 using std::cin; -289 using std::cout; -290 using std::cerr; -291 #include <iomanip> -292 -293 #include <string.h> -294 #include <string> -295 using std::string; -296 -297 #include <algorithm> -298 using std::min; -299 using std::max; + 61 put(Help, "usage", + 62 "bootstrap: the bootstrap translator for SubX.\n" + 63 "This program also wraps some miscellaneous useful functionality:\n" + 64 " - an x86 emulator: `bootstrap run`\n" + 65 " - online help: `bootstrap help`\n" + 66 "\n" + 67 "== Ways to invoke bootstrap\n" + 68 "- See this message:\n" + 69 " bootstrap --help\n" + 70 "- Convert a textual SubX program into a standard ELF binary that you can\n" + 71 " run on your computer:\n" + 72 " bootstrap translate input1.subx input2.subx ... -o <output ELF binary>\n" + 73 "- Run a SubX binary using SubX itself (for better error messages):\n" + 74 " bootstrap run <ELF binary>\n" + 75 "- Run all bootstrap's unit tests:\n" + 76 " bootstrap test\n" + 77 "- Run a single unit test:\n" + 78 " bootstrap test <test name>\n" + 79 " e.g. bootstrap test test_copy_imm32_to_EAX\n" + 80 "\n" + 81 "== Debugging aids\n" + 82 "- Add '--trace' to any of these commands to save a trace to disk at the end.\n" + 83 " This can run out of memory for long-running commands.\n" + 84 "- Add '--debug' to add information to traces. 'bootstrap --debug translate'\n" + 85 " will save metadata to disk that 'bootstrap --trace run' uses to make traces\n" + 86 " more informative.\n" + 87 "- Add '--dump --trace' to emit a trace incrementally to stderr.\n" + 88 " This approach will work even for long-running programs.\n" + 89 " (Though the combination of flags is counter-intuitive and can probably\n" + 90 " be improved.)\n" + 91 "\n" + 92 "Options starting with '--' must always come before any other arguments.\n" + 93 "\n" + 94 "To start learning how to write SubX programs, see Readme.md (particularly\n" + 95 "the section on the x86 instruction set) and then run:\n" + 96 " bootstrap help\n" + 97 ); + 98 // End Help Texts + 99 } +100 +101 :(code) +102 bool is_equal(char* s, const char* lit) { +103 return strncmp(s, lit, strlen(lit)) == 0; +104 } +105 +106 bool starts_with(const string& s, const string& pat) { +107 string::const_iterator a=s.begin(), b=pat.begin(); +108 for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) +109 if (*a != *b) return false; +110 return b == pat.end(); +111 } +112 +113 //: I'll throw some style conventions here for want of a better place for them. +114 //: As a rule I hate style guides. Do what you want, that's my motto. But since +115 //: we're dealing with C/C++, the one big thing we want to avoid is undefined +116 //: behavior. If a compiler ever encounters undefined behavior it can make +117 //: your program do anything it wants. +118 //: +119 //: For reference, my checklist of undefined behaviors to watch out for: +120 //: out-of-bounds access +121 //: uninitialized variables +122 //: use after free +123 //: dereferencing invalid pointers: null, a new of size 0, others +124 //: +125 //: casting a large number to a type too small to hold it +126 //: +127 //: integer overflow +128 //: division by zero and other undefined expressions +129 //: left-shift by negative count +130 //: shifting values by more than or equal to the number of bits they contain +131 //: bitwise operations on signed numbers +132 //: +133 //: Converting pointers to types of different alignment requirements +134 //: T* -> void* -> T*: defined +135 //: T* -> U* -> T*: defined if non-function pointers and alignment requirements are same +136 //: function pointers may be cast to other function pointers +137 //: +138 //: Casting a numeric value into a value that can't be represented by the target type (either directly or via static_cast) +139 //: +140 //: To guard against these, some conventions: +141 //: +142 //: 0. Initialize all primitive variables in functions and constructors. +143 //: +144 //: 1. Minimize use of pointers and pointer arithmetic. Avoid 'new' and +145 //: 'delete' as far as possible. Rely on STL to perform memory management to +146 //: avoid use-after-free issues (and memory leaks). +147 //: +148 //: 2. Avoid naked arrays to avoid out-of-bounds access. Never use operator[] +149 //: except with map. Use at() with STL vectors and so on. +150 //: +151 //: 3. Valgrind all the things. +152 //: +153 //: 4. Avoid unsigned numbers. Not strictly an undefined-behavior issue, but +154 //: the extra range doesn't matter, and it's one less confusing category of +155 //: interaction gotchas to worry about. +156 //: +157 //: Corollary: don't use the size() method on containers, since it returns an +158 //: unsigned and that'll cause warnings about mixing signed and unsigned, +159 //: yadda-yadda. Instead use this macro below to perform an unsafe cast to +160 //: signed. We'll just give up immediately if a container's ever too large. +161 //: Basically, Mu is not concerned about this being a little slower than it +162 //: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7) +163 //: +164 //: Addendum to corollary: We're going to uniformly use int everywhere, to +165 //: indicate that we're oblivious to number size, and since Clang on 32-bit +166 //: platforms doesn't yet support multiplication over 64-bit integers, and +167 //: since multiplying two integers seems like a more common situation to end +168 //: up in than integer overflow. +169 :(before "End Includes") +170 #define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size())) +171 +172 //: 5. Integer overflow is guarded against at runtime using the -ftrapv flag +173 //: to the compiler, supported by Clang (GCC version only works sometimes: +174 //: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work). +175 :(before "atexit(reset)") +176 initialize_signal_handlers(); // not always necessary, but doesn't hurt +177 //? cerr << INT_MAX+1 << '\n'; // test overflow +178 //? assert(false); // test SIGABRT +179 :(code) +180 // based on https://spin.atomicobject.com/2013/01/13/exceptions-stack-traces-c +181 void initialize_signal_handlers() { +182 struct sigaction action; +183 bzero(&action, sizeof(action)); +184 action.sa_sigaction = dump_and_exit; +185 sigemptyset(&action.sa_mask); +186 sigaction(SIGABRT, &action, NULL); // assert() failure or integer overflow on linux (with -ftrapv) +187 sigaction(SIGILL, &action, NULL); // integer overflow on OS X (with -ftrapv) +188 } +189 void dump_and_exit(int sig, siginfo_t* /*unused*/, void* /*unused*/) { +190 switch (sig) { +191 case SIGABRT: +192 #ifndef __APPLE__ +193 cerr << "SIGABRT: might be an integer overflow if it wasn't an assert() failure\n"; +194 _Exit(1); +195 #endif +196 break; +197 case SIGILL: +198 #ifdef __APPLE__ +199 cerr << "SIGILL: most likely caused by integer overflow\n"; +200 _Exit(1); +201 #endif +202 break; +203 default: +204 break; +205 } +206 } +207 :(before "End Includes") +208 #include <signal.h> +209 +210 //: 6. Map's operator[] being non-const is fucking evil. +211 :(before "Globals") // can't generate prototypes for these +212 // from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map +213 template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) { +214 typename T::iterator iter(map.find(key)); +215 if (iter == map.end()) { +216 cerr << "get couldn't find key '" << key << "'\n"; +217 assert(iter != map.end()); +218 } +219 return iter->second; +220 } +221 template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) { +222 typename T::const_iterator iter(map.find(key)); +223 if (iter == map.end()) { +224 cerr << "get couldn't find key '" << key << "'\n"; +225 assert(iter != map.end()); +226 } +227 return iter->second; +228 } +229 template<typename T> typename T::mapped_type const& put(T& map, typename T::key_type const& key, typename T::mapped_type const& value) { +230 map[key] = value; +231 return map[key]; +232 } +233 template<typename T> bool contains_key(T& map, typename T::key_type const& key) { +234 return map.find(key) != map.end(); +235 } +236 template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) { +237 return map[key]; +238 } +239 template<typename T> typename T::mapped_type const& put_new(T& map, typename T::key_type const& key, typename T::mapped_type const& value) { +240 assert(map.find(key) == map.end()); +241 map[key] = value; +242 return map[key]; +243 } +244 //: The contract: any container that relies on get_or_insert should never call +245 //: contains_key. +246 +247 //: 7. istreams are a royal pain in the arse. You have to be careful about +248 //: what subclass you try to putback into. You have to watch out for the pesky +249 //: failbit and badbit. Just avoid eof() and use this helper instead. +250 :(code) +251 bool has_data(istream& in) { +252 return in && !in.eof(); +253 } +254 +255 :(before "End Includes") +256 #include <assert.h> +257 +258 #include <iostream> +259 using std::istream; +260 using std::ostream; +261 using std::iostream; +262 using std::cin; +263 using std::cout; +264 using std::cerr; +265 #include <iomanip> +266 +267 #include <string.h> +268 #include <string> +269 using std::string; +270 +271 #include <algorithm> +272 using std::min; +273 using std::max; -- cgit 1.4.1-2-gfad0