Mu - 034address.cc

https://github.com/akkartik/mu/blob/master/034address.cc
  1 //: Addresses help us spend less time copying data around.
  2 
  3 //: So far we've been operating on primitives like numbers and characters, and
  4 //: we've started combining these primitives together into larger logical
  5 //: units (containers or arrays) that may contain many different primitives at
  6 //: once. Containers and arrays can grow quite large in complex programs, and
  7 //: we'd like some way to efficiently share them between recipes without
  8 //: constantly having to make copies. Right now 'next-ingredient' and 'return'
  9 //: copy data across recipe boundaries. To avoid copying large quantities of
 10 //: data around, we'll use *addresses*. An address is a bookmark to some
 11 //: arbitrary quantity of data (the *payload*). It's a primitive, so it's as
 12 //: efficient to copy as a number. To read or modify the payload 'pointed to'
 13 //: by an address, we'll perform a *lookup*.
 14 //:
 15 //: The notion of 'lookup' isn't an instruction like 'add' or 'subtract'.
 16 //: Instead it's an operation that can be performed when reading any of the
 17 //: ingredients of an instruction, and when writing to any of the products. To
 18 //: write to the payload of an ingredient rather than its value, simply add
 19 //: the /lookup property to it. Modern computers provide efficient support for
 20 //: addresses and lookups, making this a realistic feature.
 21 //:
 22 //: To create addresses and allocate memory exclusively for their use, use
 23 //: 'new'. Memory is a finite resource so if the computer can't satisfy your
 24 //: request, 'new' may return a 0 (null) address.
 25 //:
 26 //: Computers these days have lots of memory so in practice we can often
 27 //: assume we'll never run out. If you start running out however, say in a
 28 //: long-running program, you'll need to switch mental gears and start
 29 //: husbanding our memory more carefully. The most important tool to avoid
 30 //: wasting memory is to 'abandon' an address when you don't need it anymore.
 31 //: That frees up the memory allocated to it to be reused in future calls to
 32 //: 'new'.
 33 
 34 //: Since memory can be reused multiple times, it can happen that you have a
 35 //: stale copy to an address that has since been abandoned and reused. Using
 36 //: the stale address is almost never safe, but it can be very hard to track
 37 //: down such copies because any errors caused by them may occur even millions
 38 //: of instructions after the copy or abandon instruction. To help track down
 39 //: such issues, Mu tracks an 'alloc id' for each allocation it makes. The
 40 //: first call to 'new' has an alloc id of 1, the second gets 2, and so on.
 41 //: The alloc id is never reused.
 42 :(before "End Globals")
 43 long long Next_alloc_id = 0;
 44 :(before "End Reset")
 45 Next_alloc_id = 0;
 46 
 47 //: The 'new' instruction records alloc ids both in the memory being allocated
 48 //: and *also* in the address. The 'abandon' instruction clears alloc ids in
 49 //: both places as well. Tracking alloc ids in this manner allows us to raise
 50 //: errors about stale addresses much earlier: 'lookup' operations always
 51 //: compare alloc ids between the address and its payload.
 52 
 53 //: todo: give 'new' a custodian ingredient. Following malloc/free is a temporary hack.
 54 
 55 :(scenario new)
 56 # call 'new' two times with identical types without modifying the results; you
 57 # should get back different results
 58 def main [
 59   10:&:num <- new num:type
 60   12:&:num <- new num:type
 61   20:bool <- equal 10:&:num, 12:&:num
 62 ]
 63 +mem: storing 1000 in location 11
 64 +mem: storing 0 in location 20
 65 
 66 :(scenario new_array)
 67 # call 'new' with a second ingredient to allocate an array of some type rather than a single copy
 68 def main [
 69   10:&:@:num <- new num:type, 5
 70   12:&:num <- new num:type
 71   20:num/alloc2, 21:num/alloc1 <- deaddress 10:&:@:num, 12:&:num
 72   30:num <- subtract 21:num/alloc2, 20:num/alloc1
 73 ]
 74 +run: {10: ("address" "array" "number")} <- new {num: "type"}, {5: "literal"}
 75 +mem: array length is 5
 76 # skip alloc id in allocation
 77 +mem: storing 1000 in location 11
 78 # don't forget the extra locations for alloc id and array length
 79 +mem: storing 7 in location 30
 80 
 81 :(scenario dilated_reagent_with_new)
 82 def main [
 83   10:&:&:num <- new {(& num): type}
 84 ]
 85 +new: size of '(& num)' is 2
 86 
 87 //: 'new' takes a weird 'type' as its first ingredient; don't error on it
 88 :(before "End Mu Types Initialization")
 89 put(Type_ordinal, "type", 0);
 90 :(code)
 91 bool is_mu_type_literal(const reagent& r) {
 92   return is_literal(r) && r.type && r.type->name == "type";
 93 }
 94 
 95 :(before "End Primitive Recipe Declarations")
 96 NEW,
 97 :(before "End Primitive Recipe Numbers")
 98 put(Recipe_ordinal, "new", NEW);
 99 :(before "End Primitive Recipe Checks")
100 case NEW: {
101   const recipe& caller = get(Recipe, r);
102   if (inst.ingredients.empty() || SIZE(inst.ingredients) > 2) {
103     raise << maybe(caller.name) << "'new' requires one or two ingredients, but got '" << to_original_string(inst) << "'\n" << end();
104     break;
105   }
106   // End NEW Check Special-cases
107   const reagent& type = inst.ingredients.at(0);
108   if (!is_mu_type_literal(type)) {
109     raise << maybe(caller.name) << "first ingredient of 'new' should be a type, but got '" << type.original_string << "'\n" << end();
110     break;
111   }
112   if (SIZE(inst.ingredients) > 1 && !is_mu_number(inst.ingredients.at(1))) {
113     raise << maybe(caller.name) << "second ingredient of 'new' should be a number (array length), but got '" << type.original_string << "'\n" << end();
114     break;
115   }
116   if (inst.products.empty()) {
117     raise << maybe(caller.name) << "result of 'new' should never be ignored\n" << end();
118     break;
119   }
120   if (!product_of_new_is_valid(inst)) {
121     raise << maybe(caller.name) << "product of 'new' has incorrect type: '" << to_original_string(inst) << "'\n" << end();
122     break;
123   }
124   break;
125 }
126 :(code)
127 bool product_of_new_is_valid(const instruction& inst) {
128   reagent/*copy*/ product = inst.products.at(0);
129   // Update NEW product in Check
130   if (!product.type || product.type->atom || product.type->left->value != Address_type_ordinal)
131     return false;
132   drop_from_type(product, "address");
133   if (SIZE(inst.ingredients) > 1) {
134     // array allocation
135     if (!product.type || product.type->atom || product.type->left->value != Array_type_ordinal)
136       return false;
137     drop_from_type(product, "array");
138   }
139   reagent/*local*/ expected_product(new_type_tree(inst.ingredients.at(0).name));
140   return types_strictly_match(product, expected_product);
141 }
142 
143 void drop_from_type(reagent& r, string expected_type) {
144   assert(!r.type->atom);
145   if (r.type->left->name != expected_type) {
146     raise << "can't drop2 " << expected_type << " from '" << to_string(r) << "'\n" << end();
147     return;
148   }
149   // r.type = r.type->right
150   type_tree* tmp = r.type;
151   r.type = tmp->right;
152   tmp->right = NULL;
153   delete tmp;
154   // if (!r.type->right) r.type = r.type->left
155   assert(!r.type->atom);
156   if (r.type->right) return;
157   tmp = r.type;
158   r.type = tmp->left;
159   tmp->left = NULL;
160   delete tmp;
161 }
162 
163 :(scenario new_returns_incorrect_type)
164 % Hide_errors = true;
165 def main [
166   1:bool <- new num:type
167 ]
168 +error: main: product of 'new' has incorrect type: '1:bool <- new num:type'
169 
170 :(scenario new_discerns_singleton_list_from_atom_container)
171 % Hide_errors = true;
172 def main [
173   1:&:num <- new {(num): type}  # should be '{num: type}'
174 ]
175 +error: main: product of 'new' has incorrect type: '1:&:num <- new {(num): type}'
176 
177 :(scenario new_with_type_abbreviation)
178 def main [
179   1:&:num <- new num:type
180 ]
181 $error: 0
182 
183 :(scenario new_with_type_abbreviation_inside_compound)
184 def main [
185   {1: (address address number), raw: ()} <- new {(& num): type}
186 ]
187 $error: 0
188 
189 :(scenario equal_result_of_new_with_null)
190 def main [
191   1:&:num <- new num:type
192   10:bool <- equal 1:&:num, null
193 ]
194 +mem: storing 0 in location 10
195 
196 //: To implement 'new', a Mu transform turns all 'new' instructions into
197 //: 'allocate' instructions that precompute the amount of memory they want to
198 //: allocate.
199 
200 //: Ensure that we never call 'allocate' directly, and that there's no 'new'
201 //: instructions left after the transforms have run.
202 :(before "End Primitive Recipe Checks")
203 case ALLOCATE: {
204   raise << "never call 'allocate' directly'; always use 'new'\n" << end();
205   break;
206 }
207 :(before "End Primitive Recipe Implementations")
208 case NEW: {
209   raise << "no implementation for 'new'; why wasn't it translated to 'allocate'? Please save a copy of your program and send it to Kartik.\n" << end();
210   break;
211 }
212 
213 :(after "Transform.push_back(check_instruction)")  // check_instruction will guard against direct 'allocate' instructions below
214 Transform.push_back(transform_new_to_allocate);  // idempotent
215 
216 :(code)
217 void transform_new_to_allocate(const recipe_ordinal r) {
218   trace(9991, "transform") << "--- convert 'new' to 'allocate' for recipe " << get(Recipe, r).name << end();
219   for (int i = 0;  i < SIZE(get(Recipe, r).steps);  ++i) {
220     instruction& inst = get(Recipe, r).steps.at(i);
221     // Convert 'new' To 'allocate'
222     if (inst.name == "new") {
223       if (inst.ingredients.empty()) return;  // error raised elsewhere
224       inst.operation = ALLOCATE;
225       type_tree* type = new_type_tree(inst.ingredients.at(0).name);
226       inst.ingredients.at(0).set_value(size_of(type));
227       trace(9992, "new") << "size of '" << inst.ingredients.at(0).name << "' is " << inst.ingredients.at(0).value << end();
228       delete type;
229     }
230   }
231 }
232 
233 //: implement 'allocate' based on size
234 
235 :(before "End Globals")
236 extern const int Reserved_for_tests = 1000;
237 int Memory_allocated_until = Reserved_for_tests;
238 int Initial_memory_per_routine = 100000;
239 :(before "End Reset")
240 Memory_allocated_until = Reserved_for_tests;
241 Initial_memory_per_routine = 100000;
242 :(before "End routine Fields")
243 int alloc, alloc_max;
244 :(before "End routine Constructor")
245 alloc = Memory_allocated_until;
246 Memory_allocated_until += Initial_memory_per_routine;
247 alloc_max = Memory_allocated_until;
248 trace("new") << "routine allocated memory from " << alloc << " to " << alloc_max << end();
249 
250 :(before "End Primitive Recipe Declarations")
251 ALLOCATE,
252 :(before "End Primitive Recipe Numbers")
253 put(Recipe_ordinal, "allocate", ALLOCATE);
254 :(before "End Primitive Recipe Implementations")
255 case ALLOCATE: {
256   // compute the space we need
257   int size = ingredients.at(0).at(0);
258   int alloc_id = Next_alloc_id;
259   Next_alloc_id++;
260   if (SIZE(ingredients) > 1) {
261     // array allocation
262     trace("mem") << "array length is " << ingredients.at(1).at(0) << end();
263     size = /*space for length*/1 + size*ingredients.at(1).at(0);
264   }
265   int result = allocate(size);
266   // initialize alloc-id in payload
267   trace("mem") << "storing alloc-id " << alloc_id << " in location " << result << end();
268   put(Memory, result, alloc_id);
269   if (SIZE(current_instruction().ingredients) > 1) {
270     // initialize array length
271     trace("mem") << "storing array length " << ingredients.at(1).at(0) << " in location " << result+/*skip alloc id*/1 << end();
272     put(Memory, result+/*skip alloc id*/1, ingredients.at(1).at(0));
273   }
274   products.resize(1);
275   products.at(0).push_back(alloc_id);
276   products.at(0).push_back(result);
277   break;
278 }
279 :(code)
280 int allocate(int size) {
281   // include space for alloc id
282   ++size;
283   trace("mem") << "allocating size " << size << end();
284 //?   Total_alloc += size;
285 //?   ++Num_alloc;
286   // Allocate Special-cases
287   // compute the region of memory to return
288   // really crappy at the moment
289   ensure_space(size);
290   const int result = Current_routine->alloc;
291   trace("mem") << "new alloc: " << result << end();
292   // initialize allocated space
293   for (int address = result;  address < result+size;  ++address) {
294     trace("mem") << "storing 0 in location " << address << end();
295     put(Memory, address, 0);
296   }
297   Current_routine->alloc += size;
298   // no support yet for reclaiming memory between routines
299   assert(Current_routine->alloc <= Current_routine->alloc_max);
300   return result;
301 }
302 
303 //: statistics for debugging
304 //? :(before "End Globals")
305 //? int Total_alloc = 0;
306 //? int Num_alloc = 0;
307 //? int Total_free = 0;
308 //? int Num_free = 0;
309 //? :(before "End Reset")
310 //? if (!Memory.empty()) {
311 //?   cerr << Total_alloc << "/" << Num_alloc
312 //?        << " vs " << Total_free << "/" << Num_free << '\n';
313 //?   cerr << SIZE(Memory) << '\n';
314 //? }
315 //? Total_alloc = Num_alloc = Total_free = Num_free = 0;
316 
317 :(code)
318 void ensure_space(int size) {
319   if (size > Initial_memory_per_routine) {
320     cerr << "can't allocate " << size << " locations, that's too much compared to " << Initial_memory_per_routine << ".\n";
321     exit(1);
322   }
323   if (Current_routine->alloc + size > Current_routine->alloc_max) {
324     // waste the remaining space and create a new chunk
325     Current_routine->alloc = Memory_allocated_until;
326     Memory_allocated_until += Initial_memory_per_routine;
327     Current_routine->alloc_max = Memory_allocated_until;
328     trace("new") << "routine allocated memory from " << Current_routine->alloc << " to " << Current_routine->alloc_max << end();
329   }
330 }
331 
332 :(scenario new_initializes)
333 % Memory_allocated_until = 10;
334 % put(Memory, Memory_allocated_until, 1);
335 def main [
336   1:&:num <- new num:type
337 ]
338 +mem: storing 0 in location 10
339 +mem: storing 0 in location 11
340 +mem: storing 10 in location 2
341 
342 :(scenario new_initializes_alloc_id)
343 % Memory_allocated_until = 10;
344 % put(Memory, Memory_allocated_until, 1);
345 % Next_alloc_id = 23;
346 def main [
347   1:&:num <- new num:type
348 ]
349 # initialize memory
350 +mem: storing 0 in location 10
351 +mem: storing 0 in location 11
352 # alloc-id in payload
353 +mem: storing alloc-id 23 in location 10
354 # alloc-id in address
355 +mem: storing 23 in location 1
356 
357 :(scenario new_size)
358 def main [
359   10:&:num <- new num:type
360   12:&:num <- new num:type
361   20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:num, 12:&:num
362   30:num <- subtract 21:num/alloc2, 20:num/alloc1
363 ]
364 # size of number + alloc id
365 +mem: storing 2 in location 30
366 
367 :(scenario new_array_size)
368 def main [
369   10:&:@:num <- new num:type, 5
370   12:&:num <- new num:type
371   20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:num, 12:&:num
372   30:num <- subtract 21:num/alloc2, 20:num/alloc1
373 ]
374 # 5 locations for array contents + array length + alloc id
375 +mem: storing 7 in location 30
376 
377 :(scenario new_empty_array)
378 def main [
379   10:&:@:num <- new num:type, 0
380   12:&:num <- new num:type
381   20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:@:num, 12:&:num
382   30:num <- subtract 21:num/alloc2, 20:num/alloc1
383 ]
384 +run: {10: ("address" "array" "number")} <- new {num: "type"}, {0: "literal"}
385 +mem: array length is 0
386 # one location for array length
387 +mem: storing 2 in location 30
388 
389 //: If a routine runs out of its initial allocation, it should allocate more.
390 :(scenario new_overflow)
391 % Initial_memory_per_routine = 3;  // barely enough room for point allocation below
392 def main [
393   10:&:num <- new num:type
394   12:&:point <- new point:type  # not enough room in initial page
395 ]
396 +new: routine allocated memory from 1000 to 1003
397 +new: routine allocated memory from 1003 to 1006
398 
399 :(scenario new_without_ingredient)
400 % Hide_errors = true;
401 def main [
402   1:&:num <- new  # missing ingredient
403 ]
404 +error: main: 'new' requires one or two ingredients, but got '1:&:num <- new'
405 
406 //: a little helper: convert address to number
407 
408 :(before "End Primitive Recipe Declarations")
409 DEADDRESS,
410 :(before "End Primitive Recipe Numbers")
411 put(Recipe_ordinal, "deaddress", DEADDRESS);
412 :(before "End Primitive Recipe Checks")
413 case DEADDRESS: {
414   // primary goal of these checks is to forbid address arithmetic
415   for (int i = 0;  i < SIZE(inst.ingredients);  ++i) {
416     if (!is_mu_address(inst.ingredients.at(i))) {
417       raise << maybe(get(Recipe, r).name) << "'deaddress' requires address ingredients, but got '" << inst.ingredients.at(i).original_string << "'\n" << end();
418       goto finish_checking_instruction;
419     }
420   }
421   if (SIZE(inst.products) > SIZE(inst.ingredients)) {
422     raise << maybe(get(Recipe, r).name) << "too many products in '" << to_original_string(inst) << "'\n" << end();
423     break;
424   }
425   for (int i = 0;  i < SIZE(inst.products);  ++i) {
426     if (!is_real_mu_number(inst.products.at(i))) {
427       raise << maybe(get(Recipe, r).name) << "'deaddress' requires number products, but got '" << inst.products.at(i).original_string << "'\n" << end();
428       goto finish_checking_instruction;
429     }
430   }
431   break;
432 }
433 :(before "End Primitive Recipe Implementations")
434 case DEADDRESS: {
435   products.resize(SIZE(ingredients));
436   for (int i = 0;  i < SIZE(ingredients);  ++i) {
437     products.at(i).push_back(ingredients.at(i).at(/*skip alloc id*/1));
438   }
439   break;
440 }