4266 - space for alloc-id in heap allocations

This has taken me almost 6 weeks :(
author: Kartik Agaram <vc@akkartik.com> 2018-06-24 09:16:17 -0700
committer: Kartik Agaram <vc@akkartik.com> 2018-06-24 09:18:20 -0700
commit: 23d3a02226973f80188e84fa5dcedb14413c5b7f (patch)
tree: 3c73284cb795e74d78e53b72df470cafca4c70cf /034address.cc
parent: 377b00b045289a3fa8e88d4b2f129d797c687e2f (diff)
download: mu-23d3a02226973f80188e84fa5dcedb14413c5b7f.tar.gz
1 files changed, 122 insertions, 38 deletions
diff --git a/034address.cc b/034address.cc
index b97dfb17..6c4bdda4 100644
--- a/034address.cc
+++ b/034address.cc
@@ -18,6 +18,37 @@
 //: write to the payload of an ingredient rather than its value, simply add
 //: the /lookup property to it. Modern computers provide efficient support for
 //: addresses and lookups, making this a realistic feature.
+//:
+//: To create addresses and allocate memory exclusively for their use, use
+//: 'new'. Memory is a finite resource so if the computer can't satisfy your
+//: request, 'new' may return a 0 (null) address.
+//:
+//: Computers these days have lots of memory so in practice we can often
+//: assume we'll never run out. If you start running out however, say in a
+//: long-running program, you'll need to switch mental gears and start
+//: husbanding our memory more carefully. The most important tool to avoid
+//: wasting memory is to 'abandon' an address when you don't need it anymore.
+//: That frees up the memory allocated to it to be reused in future calls to
+//: 'new'.
+
+//: Since memory can be reused multiple times, it can happen that you have a
+//: stale copy to an address that has since been abandoned and reused. Using
+//: the stale address is almost never safe, but it can be very hard to track
+//: down such copies because any errors caused by them may occur even millions
+//: of instructions after the copy or abandon instruction. To help track down
+//: such issues, Mu tracks an 'alloc id' for each allocation it makes. The
+//: first call to 'new' has an alloc id of 1, the second gets 2, and so on.
+//: The alloc id is never reused.
+:(before "End Globals")
+long long Next_alloc_id = 0;
+:(before "End Reset")
+Next_alloc_id = 0;
+
+//: The 'new' instruction records alloc ids both in the memory being allocated
+//: and *also* in the address. The 'abandon' instruction clears alloc ids in
+//: both places as well. Tracking alloc ids in this manner allows us to raise
+//: errors about stale addresses much earlier: 'lookup' operations always
+//: compare alloc ids between the address and its payload.
 
 //: todo: give 'new' a custodian ingredient. Following malloc/free is a temporary hack.
 
@@ -25,29 +56,33 @@
 # call 'new' two times with identical types without modifying the results; you
 # should get back different results
 def main [
-  1:&:num/raw <- new num:type
-  2:&:num/raw <- new num:type
-  3:bool/raw <- equal 1:&:num/raw, 2:&:num/raw
+  10:&:num <- new num:type
+  12:&:num <- new num:type
+  20:bool <- equal 10:&:num, 12:&:num
 ]
-+mem: storing 0 in location 3
++mem: storing 1000 in location 11
++mem: storing 0 in location 20
 
 :(scenario new_array)
 # call 'new' with a second ingredient to allocate an array of some type rather than a single copy
 def main [
-  1:&:@:num/raw <- new num:type, 5
-  2:&:num/raw <- new num:type
-  3:num/raw <- subtract 2:&:num/raw, 1:&:@:num/raw
+  10:&:@:num <- new num:type, 5
+  12:&:num <- new num:type
+  20:num/alloc2, 21:num/alloc1 <- deaddress 10:&:@:num, 12:&:num
+  30:num <- subtract 21:num/alloc2, 20:num/alloc1
 ]
-+run: {1: ("address" "array" "number"), "raw": ()} <- new {num: "type"}, {5: "literal"}
++run: {10: ("address" "array" "number")} <- new {num: "type"}, {5: "literal"}
 +mem: array length is 5
-# don't forget the extra location for array length
-+mem: storing 6 in location 3
+# skip alloc id in allocation
++mem: storing 1000 in location 11
+# don't forget the extra locations for alloc id and array length
++mem: storing 7 in location 30
 
 :(scenario dilated_reagent_with_new)
 def main [
-  1:&:&:num <- new {(& num): type}
+  10:&:&:num <- new {(& num): type}
 ]
-+new: size of '(& num)' is 1
++new: size of '(& num)' is 2
 
 //: 'new' takes a weird 'type' as its first ingredient; don't error on it
 :(before "End Mu Types Initialization")
@@ -135,22 +170,29 @@ def main [
 :(scenario new_discerns_singleton_list_from_atom_container)
 % Hide_errors = true;
 def main [
-  1:&:num/raw <- new {(num): type}  # should be '{num: type}'
+  1:&:num <- new {(num): type}  # should be '{num: type}'
 ]
-+error: main: product of 'new' has incorrect type: '1:&:num/raw <- new {(num): type}'
++error: main: product of 'new' has incorrect type: '1:&:num <- new {(num): type}'
 
 :(scenario new_with_type_abbreviation)
 def main [
-  1:&:num/raw <- new num:type
+  1:&:num <- new num:type
 ]
 $error: 0
 
 :(scenario new_with_type_abbreviation_inside_compound)
 def main [
-  {1: (& & num), raw: ()} <- new {(& num): type}
+  {1: (address address number), raw: ()} <- new {(& num): type}
 ]
 $error: 0
 
+:(scenario equal_result_of_new_with_null)
+def main [
+  1:&:num <- new num:type
+  10:bool <- equal 1:&:num, null
+]
++mem: storing 0 in location 10
+
 //: To implement 'new', a Mu transform turns all 'new' instructions into
 //: 'allocate' instructions that precompute the amount of memory they want to
 //: allocate.
@@ -221,15 +263,18 @@ case ALLOCATE: {
   int result = allocate(size);
   if (SIZE(current_instruction().ingredients) > 1) {
     // initialize array length
-    trace("mem") << "storing " << ingredients.at(1).at(0) << " in location " << result << end();
-    put(Memory, result, ingredients.at(1).at(0));
+    trace("mem") << "storing array length " << ingredients.at(1).at(0) << " in location " << result+/*skip alloc id*/1 << end();
+    put(Memory, result+/*skip alloc id*/1, ingredients.at(1).at(0));
   }
   products.resize(1);
+  products.at(0).push_back(/*alloc id*/0);
   products.at(0).push_back(result);
   break;
 }
 :(code)
 int allocate(int size) {
+  // include space for alloc id
+  ++size;
   trace("mem") << "allocating size " << size << end();
 //?   Total_alloc += size;
 //?   ++Num_alloc;
@@ -289,42 +334,45 @@ def main [
 
 :(scenario new_size)
 def main [
-  11:&:num/raw <- new num:type
-  12:&:num/raw <- new num:type
-  13:num/raw <- subtract 12:&:num/raw, 11:&:num/raw
+  10:&:num <- new num:type
+  12:&:num <- new num:type
+  20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:num, 12:&:num
+  30:num <- subtract 21:num/alloc2, 20:num/alloc1
 ]
-# size of number
-+mem: storing 1 in location 13
+# size of number + alloc id
++mem: storing 2 in location 30
 
 :(scenario new_array_size)
 def main [
-  1:&:@:num/raw <- new num:type, 5
-  2:&:num/raw <- new num:type
-  3:num/raw <- subtract 2:&:num/raw, 1:&:@:num/raw
+  10:&:@:num <- new num:type, 5
+  12:&:num <- new num:type
+  20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:num, 12:&:num
+  30:num <- subtract 21:num/alloc2, 20:num/alloc1
 ]
-# 5 locations for array contents + array length
-+mem: storing 6 in location 3
+# 5 locations for array contents + array length + alloc id
++mem: storing 7 in location 30
 
 :(scenario new_empty_array)
 def main [
-  1:&:@:num/raw <- new num:type, 0
-  2:&:num/raw <- new num:type
-  3:num/raw <- subtract 2:&:num/raw, 1:&:@:num/raw
+  10:&:@:num <- new num:type, 0
+  12:&:num <- new num:type
+  20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:@:num, 12:&:num
+  30:num <- subtract 21:num/alloc2, 20:num/alloc1
 ]
-+run: {1: ("address" "array" "number"), "raw": ()} <- new {num: "type"}, {0: "literal"}
++run: {10: ("address" "array" "number")} <- new {num: "type"}, {0: "literal"}
 +mem: array length is 0
 # one location for array length
-+mem: storing 1 in location 3
++mem: storing 2 in location 30
 
 //: If a routine runs out of its initial allocation, it should allocate more.
 :(scenario new_overflow)
-% Initial_memory_per_routine = 2;  // barely enough room for point allocation below
+% Initial_memory_per_routine = 3;  // barely enough room for point allocation below
 def main [
-  1:&:num/raw <- new num:type
-  2:&:point/raw <- new point:type  # not enough room in initial page
+  10:&:num <- new num:type
+  12:&:point <- new point:type  # not enough room in initial page
 ]
-+new: routine allocated memory from 1000 to 1002
-+new: routine allocated memory from 1002 to 1004
++new: routine allocated memory from 1000 to 1003
++new: routine allocated memory from 1003 to 1006
 
 :(scenario new_without_ingredient)
 % Hide_errors = true;
@@ -332,3 +380,39 @@ def main [
   1:&:num <- new  # missing ingredient
 ]
 +error: main: 'new' requires one or two ingredients, but got '1:&:num <- new'
+
+//: a little helper: convert address to number
+
+:(before "End Primitive Recipe Declarations")
+DEADDRESS,
+:(before "End Primitive Recipe Numbers")
+put(Recipe_ordinal, "deaddress", DEADDRESS);
+:(before "End Primitive Recipe Checks")
+case DEADDRESS: {
+  // primary goal of these checks is to forbid address arithmetic
+  for (int i = 0;  i < SIZE(inst.ingredients);  ++i) {
+    if (!is_mu_address(inst.ingredients.at(i))) {
+      raise << maybe(get(Recipe, r).name) << "'deaddress' requires address ingredients, but got '" << inst.ingredients.at(i).original_string << "'\n" << end();
+      goto finish_checking_instruction;
+    }
+  }
+  if (SIZE(inst.products) > SIZE(inst.ingredients)) {
+    raise << maybe(get(Recipe, r).name) << "too many products in '" << to_original_string(inst) << "'\n" << end();
+    break;
+  }
+  for (int i = 0;  i < SIZE(inst.products);  ++i) {
+    if (!is_real_mu_number(inst.products.at(i))) {
+      raise << maybe(get(Recipe, r).name) << "'deaddress' requires number products, but got '" << inst.products.at(i).original_string << "'\n" << end();
+      goto finish_checking_instruction;
+    }
+  }
+  break;
+}
+:(before "End Primitive Recipe Implementations")
+case DEADDRESS: {
+  products.resize(SIZE(ingredients));
+  for (int i = 0;  i < SIZE(ingredients);  ++i) {
+    products.at(i).push_back(ingredients.at(i).at(/*skip alloc id*/1));
+  }
+  break;
+}
author	Kartik Agaram <vc@akkartik.com>	2018-06-24 09:16:17 -0700
committer	Kartik Agaram <vc@akkartik.com>	2018-06-24 09:18:20 -0700
commit	23d3a02226973f80188e84fa5dcedb14413c5b7f (patch)
tree	3c73284cb795e74d78e53b72df470cafca4c70cf /034address.cc
parent	377b00b045289a3fa8e88d4b2f129d797c687e2f (diff)
download	mu-23d3a02226973f80188e84fa5dcedb14413c5b7f.tar.gz