1 //: Everything this project/binary supports.
  2 //: This should give you a sense for what to look forward to in later layers.
  3 
  4 :(before "End Commandline Parsing")
  5 if (argc <= 1 || is_equal(argv[1], "--help")) {
  6   //: this is the functionality later layers will provide
  7   // currently no automated tests for commandline arg parsing
  8   cerr << get(Help, "usage");
  9   return 0;
 10 }
 11 
 12 //: Support for option parsing.
 13 //: Options always begin with '--' and are always the first arguments. An
 14 //: option will never follow a non-option.
 15 char** arg = &argv[1];
 16 while (argc > 1 && starts_with(*arg, "--")) {
 17   if (false)
 18     ;  // no-op branch just so any further additions can consistently always start with 'else'
 19   // End Commandline Options(*arg)
 20   else
 21     cerr << "skipping unknown option " << *arg << '\n';
 22   --argc;  ++argv;  ++arg;
 23 }
 24 
 25 if (is_equal(argv[1], "help")) {
 26   if (argc == 2) {
 27     cerr << "help on what?\n";
 28     help_contents();
 29     return 0;
 30   }
 31   string key(argv[2]);
 32   // End Help Special-cases(key)
 33   if (contains_key(Help, key)) {
 34     cerr << get(Help, key);
 35     return 0;
 36   }
 37   else {
 38     cerr << "No help found for '" << key << "'\n";
 39     help_contents();
 40     cerr << "Please check your command for typos.\n";
 41     return 1;
 42   }
 43 }
 44 
 45 :(code)
 46 void help_contents() {
 47   cerr << "Available top-level topics:\n";
 48   cerr << "  usage\n";
 49   // End Help Contents
 50 }
 51 
 52 :(before "End Globals")
 53 map<string, string> Help;
 54 :(before "End Includes")
 55 #include <map>
 56 using std::map;
 57 :(before "End One-time Setup")
 58 init_help();
 59 :(code)
 60 void init_help() {
 61   put(Help, "usage",
 62     "Welcome to SubX, a better way to program in machine code.\n"
 63     "SubX uses a subset of the x86 instruction set. SubX programs will run\n"
 64     "without modification on Linux computers.\n"
 65     "It provides a better experience and better error messages than\n"
 66     "programming directly in machine code, but you have to stick to the\n"
 67     "instructions it supports.\n"
 68     "\n"
 69     "== Ways to invoke subx\n"
 70     "- Run tests:\n"
 71     "    subx test\n"
 72     "- See this message:\n"
 73     "    subx --help\n"
 74     "- Convert a textual SubX program into a standard ELF binary that you can\n"
 75     "  run on your computer:\n"
 76     "    subx translate input1.subx intput2.subx ... -o <output ELF binary>\n"
 77     "- Run a SubX binary using SubX itself (for better error messages):\n"
 78     "    subx run <ELF binary>\n"
 79     "Add '--trace' to any of these commands to also emit a trace, for debugging purposes.\n"
 80     "However, options starting with '--' must always come before any other arguments.\n"
 81     "\n"
 82     "To start learning how to write SubX programs, run:\n"
 83     "  subx help\n"
 84   );
 85   // End Help Texts
 86 }
 87 
 88 :(code)
 89 bool is_equal(char* s, const char* lit) {
 90   return strncmp(s, lit, strlen(lit)) == 0;
 91 }
 92 
 93 bool starts_with(const string& s, const string& pat) {
 94   string::const_iterator a=s.begin(), b=pat.begin();
 95   for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
 96     if (*a != *b) return false;
 97   return b == pat.end();
 98 }
 99 
100 //: I'll throw some style conventions here for want of a better place for them.
101 //: As a rule I hate style guides. Do what you want, that's my motto. But since
102 //: we're dealing with C/C++, the one big thing we want to avoid is undefined
103 //: behavior. If a compiler ever encounters undefined behavior it can make
104 //: your program do anything it wants.
105 //:
106 //: For reference, my checklist of undefined behaviors to watch out for:
107 //:   out-of-bounds access
108 //:   uninitialized variables
109 //:   use after free
110 //:   dereferencing invalid pointers: null, a new of size 0, others
111 //:
112 //:   casting a large number to a type too small to hold it
113 //:
114 //:   integer overflow
115 //:   division by zero and other undefined expressions
116 //:   left-shift by negative count
117 //:   shifting values by more than or equal to the number of bits they contain
118 //:   bitwise operations on signed numbers
119 //:
120 //:   Converting pointers to types of different alignment requirements
121 //:     T* -> void* -> T*: defined
122 //:     T* -> U* -> T*: defined if non-function pointers and alignment requirements are same
123 //:     function pointers may be cast to other function pointers
124 //:
125 //:       Casting a numeric value into a value that can't be represented by the target type (either directly or via static_cast)
126 //:
127 //: To guard against these, some conventions:
128 //:
129 //: 0. Initialize all primitive variables in functions and constructors.
130 //:
131 //: 1. Minimize use of pointers and pointer arithmetic. Avoid 'new' and
132 //: 'delete' as far as possible. Rely on STL to perform memory management to
133 //: avoid use-after-free issues (and memory leaks).
134 //:
135 //: 2. Avoid naked arrays to avoid out-of-bounds access. Never use operator[]
136 //: except with map. Use at() with STL vectors and so on.
137 //:
138 //: 3. Valgrind all the things.
139 //:
140 //: 4. Avoid unsigned numbers. Not strictly an undefined-behavior issue, but
141 //: the extra range doesn't matter, and it's one less confusing category of
142 //: interaction gotchas to worry about.
143 //:
144 //: Corollary: don't use the size() method on containers, since it returns an
145 //: unsigned and that'll cause warnings about mixing signed and unsigned,
146 //: yadda-yadda. Instead use this macro below to perform an unsafe cast to
147 //: signed. We'll just give up immediately if a container's ever too large.
148 //: Basically, Mu is not concerned about this being a little slower than it
149 //: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7)
150 //:
151 //: Addendum to corollary: We're going to uniformly use int everywhere, to
152 //: indicate that we're oblivious to number size, and since Clang on 32-bit
153 //: platforms doesn't yet support multiplication over 64-bit integers, and
154 //: since multiplying two integers seems like a more common situation to end
155 //: up in than integer overflow.
156 :(before "End Includes")
157 #define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size()))
158 
159 //: 5. Integer overflow is guarded against at runtime using the -ftrapv flag
160 //: to the compiler, supported by Clang (GCC version only works sometimes:
161 //: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work).
162 :(before "atexit(reset)")
163 initialize_signal_handlers();  // not always necessary, but doesn't hurt
164 //? cerr << INT_MAX+1 << '\n';  // test overflow
165 //? assert(false);  // test SIGABRT
166 :(code)
167 // based on https://spin.atomicobject.com/2013/01/13/exceptions-stack-traces-c
168 void initialize_signal_handlers() {
169   struct sigaction action;
170   bzero(&action, sizeof(action));
171   action.sa_sigaction = dump_and_exit;
172   sigemptyset(&action.sa_mask);
173   sigaction(SIGABRT, &action, NULL);  // assert() failure or integer overflow on linux (with -ftrapv)
174   sigaction(SIGILL,  &action, NULL);  // integer overflow on OS X (with -ftrapv)
175 }
176 void dump_and_exit(int sig, siginfo_t* /*unused*/, void* /*unused*/) {
177   switch (sig) {
178     case SIGABRT:
179       #ifndef __APPLE__
180         cerr << "SIGABRT: might be an integer overflow if it wasn't an assert() failure\n";
181         _Exit(1);
182       #endif
183       break;
184     case SIGILL:
185       #ifdef __APPLE__
186         cerr << "SIGILL: most likely caused by integer overflow\n";
187         _Exit(1);
188       #endif
189       break;
190     default:
191       break;
192   }
193 }
194 :(before "End Includes")
195 #include <signal.h>
196 
197 //: For good measure we'll also enable SIGFPE.
198 :(before "atexit(reset)")
199 feenableexcept(FE_OVERFLOW | FE_UNDERFLOW);
200 //? assert(sizeof(int) == 4 && sizeof(float) == 4);
201 //? //                          | exp   |  mantissa
202 //? int smallest_subnormal = 0b00000000000000000000000000000001;
203 //? float smallest_subnormal_f = *reinterpret_cast<float*>(&smallest_subnormal);
204 //? cerr << "ε: " << smallest_subnormal_f << '\n';
205 //? cerr << "ε/2: " << smallest_subnormal_f/2 << " (underflow)\n";  // test SIGFPE
206 :(before "End Includes")
207 #include <fenv.h>
208 :(code)
209 #ifdef __APPLE__
210 // Public domain polyfill for feenableexcept on OS X
211 // http://www-personal.umich.edu/~williams/archive/computation/fe-handling-example.c
212 int feenableexcept(unsigned int excepts) {
213   static fenv_t fenv;
214   unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
215   unsigned int old_excepts;
216   if (fegetenv(&fenv)) return -1;
217   old_excepts = fenv.__control & FE_ALL_EXCEPT;
218   fenv.__control &= ~new_excepts;
219   fenv.__mxcsr &= ~(new_excepts << 7);
220   return fesetenv(&fenv) ? -1 : old_excepts;
221 }
222 #endif
223 
224 //: 6. Map's operator[] being non-const is fucking evil.
225 :(before "Globals")  // can't generate prototypes for these
226 // from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map
227 template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) {
228   typename T::iterator iter(map.find(key));
229   if (iter == map.end()) {
230     cerr << "get couldn't find key '" << key << "'\n";
231     assert(iter != map.end());
232   }
233   return iter->second;
234 }
235 template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) {
236   typename T::const_iterator iter(map.find(key));
237   if (iter == map.end()) {
238     cerr << "get couldn't find key '" << key << "'\n";
239     assert(iter != map.end());
240   }
241   return iter->second;
242 }
243 template<typename T> typename T::mapped_type const& put(T& map, typename T::key_type const& key, typename T::mapped_type const& value) {
244   map[key] = value;
245   return map[key];
246 }
247 template<typename T> bool contains_key(T& map, typename T::key_type const& key) {
248   return map.find(key) != map.end();
249 }
250 template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) {
251   return map[key];
252 }
253 template<typename T> typename T::mapped_type const& put_new(T& map, typename T::key_type const& key, typename T::mapped_type const& value) {
254   assert(map.find(key) == map.end());
255   map[key] = value;
256   return map[key];
257 }
258 //: The contract: any container that relies on get_or_insert should never call
259 //: contains_key.
260 
261 //: 7. istreams are a royal pain in the arse. You have to be careful about
262 //: what subclass you try to putback into. You have to watch out for the pesky
263 //: failbit and badbit. Just avoid eof() and use this helper instead.
264 :(code)
265 bool has_data(istream& in) {
266   return in && !in.eof();
267 }
268 
269 :(before "End Includes")
270 #include <assert.h>
271 
272 #include <iostream>
273 using std::istream;
274 using std::ostream;
275 using std::iostream;
276 using std::cin;
277 using std::cout;
278 using std::cerr;
279 #include <iomanip>
280 
281 #include <string.h>
282 #include <string>
283 using std::string;
284 
285 #include <algorithm>
286 using std::min;
287 using std::max;