3324 - completely redo type abbreviations

The old approach with '&' and '@' modifiers turned out to be a bad idea because it introduces notions of precedence. Worse, it turns out you want different precedence rules at different times as the old test alluded: x:@number:3 # we want this to mean (address number 3) x:address:@number # we want this to mean (address array number) Instead we'll give up and focus on a single extensible mechanism that allows us to say this instead: x:@:number:3 x:address:@:number In addition it allows us to shorten other types as well: x:&:@:num type board = &:@:&:@:char # for tic-tac-toe Hmm, that last example reminds me that we don't handle abbreviations inside type abbreviation definitions so far..
author: Kartik K. Agaram <vc@akkartik.com> 2016-09-11 17:14:48 -0700
committer: Kartik K. Agaram <vc@akkartik.com> 2016-09-11 17:50:36 -0700
commit: cdf0f349d1ad432d785cf69c7a136fff07258adf (patch)
tree: fe88b7b2039e9d50ce5b37cc115315a12f22d797
parent: 68578a7828ce8300fa10b28b5f57e56723303e93 (diff)
download: mu-cdf0f349d1ad432d785cf69c7a136fff07258adf.tar.gz
5 files changed, 157 insertions, 81 deletions
diff --git a/017parse_tree.cc b/017parse_tree.cc
index 8efcb83e..83a96d6b 100644
--- a/017parse_tree.cc
+++ b/017parse_tree.cc
@@ -7,6 +7,9 @@
 // the first element of a type tree is always an atom, and left and right
 // pointers of non-atoms are never NULL. All type trees are 'dotted' in lisp
 // parlance.
+//
+// For now you can't use the simpler 'colon-based' representation inside type
+// trees. Once you start typing parens, keep on typing parens.
 
 :(scenarios load)
 :(scenario dilated_reagent_with_nested_brackets)
@@ -23,6 +26,7 @@ type_names = parse_string_tree(type_names);
 :(code)
 string_tree* parse_string_tree(string_tree* s) {
   assert(s->atom);
+  assert(!s->value.empty());
   if (s->value.at(0) != '(') return s;
   string_tree* result = parse_string_tree(s->value);
   delete s;
diff --git a/018type_abbreviations.cc b/018type_abbreviations.cc
index a13d97a8..9e907ae9 100644
--- a/018type_abbreviations.cc
+++ b/018type_abbreviations.cc
@@ -1,90 +1,163 @@
 //: For convenience, make some common types shorter.
-//:
-//:   a) Rewrite '&t' to 'address:t' and '@t' to 'array:type' (with the
-//:   ability to chain any combination of the two). This is not extensible.
-//:
-//:   b) Provide a facility to create new type names out of old ones.
 
-//:: a) expanding '&' and '@'
-
-:(scenarios load)
-:(scenario abbreviations_for_address_and_array)
-def main [
-  f 1:&number  # abbreviation for 'address:number'
-  f 2:@number  # abbreviation for 'array:number'
-  f 3:&@number  # combining '&' and '@'
-  f 4:&&@&@number  # ..any number of times
-  f 5:array:&number:3  # abbreviations take precedence over ':'
-  f {6: (array &number 3)}  # support for dilated reagents and more complex parse trees
-  f 7:@number:3  # *not* the same as array:number:3
-]
-+parse:   ingredient: {1: ("address" "number")}
-+parse:   ingredient: {2: ("array" "number")}
-+parse:   ingredient: {3: ("address" "array" "number")}
-+parse:   ingredient: {4: ("address" "address" "array" "address" "array" "number")}
-+parse:   ingredient: {5: ("array" ("address" "number") "3")}
-+parse:   ingredient: {6: ("array" ("address" "number") "3")}
-# not what you want
-+parse:   ingredient: {7: (("array" "number") "3")}
-
-:(scenario abbreviation_error)
-% Hide_errors = true;
+:(scenario type_abbreviations)
+type foo = number
 def main [
-  f 1:&&@&  # abbreviations without payload
+  a:foo <- copy 34
 ]
-+error: invalid type abbreviation &&@&
++run: {a: "number"} <- copy {34: "literal"}
 
-:(before "End Parsing Reagent Type Property(type_names)")
-string_tree* new_type_names = replace_address_and_array_symbols(type_names);
-delete type_names;
-type_names = new_type_names;
-:(before "End Parsing Dilated Reagent Type Property(type_names)")
-string_tree* new_type_names = replace_address_and_array_symbols(type_names);
-delete type_names;
-type_names = new_type_names;
+//:: Allow type abbreviations to be defined in mu code.
+//: For now you can't use abbreviations inside abbreviations.
 
+:(before "End Globals")
+map<string, type_tree*> Type_abbreviations, Type_abbreviations_snapshot;
+:(before "End save_snapshots")
+Type_abbreviations_snapshot = Type_abbreviations;
+:(before "End restore_snapshots")
+restore_type_abbreviations();
 :(code)
-// simple version; lots of unnecessary allocations; always creates a new pointer
-string_tree* replace_address_and_array_symbols(string_tree* orig) {
-  if (orig == NULL) return NULL;
-  if (orig->atom)
-    return replace_address_and_array_symbols(orig->value);
-  return new string_tree(replace_address_and_array_symbols(orig->left),
-                         replace_address_and_array_symbols(orig->right));
+void restore_type_abbreviations() {
+  for (map<string, type_tree*>::iterator p = Type_abbreviations.begin(); p != Type_abbreviations.end(); ++p) {
+    if (!contains_key(Type_abbreviations_snapshot, p->first))
+      delete p->second;
+  }
+  Type_abbreviations.clear();
+  Type_abbreviations = Type_abbreviations_snapshot;
 }
 
-// todo: unicode
-string_tree* replace_address_and_array_symbols(const string& type_name) {
-  if (type_name.empty()) return NULL;
-  if (type_name.at(0) != '&' && type_name.at(0) != '@')
-    return new string_tree(type_name);
-  string_tree* result = NULL;
-  string_tree* curr = NULL;
-  int i = 0;
-  while (i < SIZE(type_name)) {
-    string_tree* new_node = NULL;
-    if (type_name.at(i) == '&')
-      new_node = new string_tree("address");
-    else if (type_name.at(i) == '@')
-      new_node = new string_tree("array");
-    else
-      break;
-    if (result == NULL)
-      result = curr = new string_tree(new_node, NULL);
-    else {
-      curr->right = new string_tree(new_node, NULL);
-      curr = curr->right;
-    }
-    ++i;
+:(before "End Command Handlers")
+else if (command == "type") {
+  load_type_abbreviations(in);
+}
+
+:(code)
+void load_type_abbreviations(istream& in) {
+  string new_type_name = next_word(in);
+  assert(has_data(in) || !new_type_name.empty());
+  if (!has_data(in) || new_type_name.empty()) {
+    raise << "incomplete 'type' statement; must be of the form 'type <new type name> = <type expression>'\n" << end();
+    return;
   }
-  if (i < SIZE(type_name))
-    curr->right = new string_tree(type_name.substr(i));
-  else
-    raise << "invalid type abbreviation " << type_name << "\n" << end();
+  string arrow = next_word(in);
+  assert(has_data(in) || !arrow.empty());
+  if (arrow.empty()) {
+    raise << "incomplete 'type' statement 'type " << new_type_name << "'\n" << end();
+    return;
+  }
+  if (arrow != "=") {
+    raise << "'type' statements must be of the form 'type <new type name> = <type expression>' but got 'type " << new_type_name << ' ' << arrow << "'\n" << end();
+    return;
+  }
+  if (!has_data(in)) {
+    raise << "incomplete 'type' statement 'type " << new_type_name << " ='\n" << end();
+    return;
+  }
+  string old = next_word(in);
+  if (old.empty()) {
+    raise << "incomplete 'type' statement 'type " << new_type_name << " ='\n" << end();
+    raise << "'type' statements must be of the form 'type <new type name> = <type expression>' but got 'type " << new_type_name << ' ' << arrow << "'\n" << end();
+    return;
+  }
+  if (contains_key(Type_abbreviations, new_type_name)) {
+    raise << "'type' conflict: '" << new_type_name << "' defined as both '" << names_to_string_without_quotes(get(Type_abbreviations, new_type_name)) << "' and '" << old << "'\n" << end();
+    return;
+  }
+  trace(9990, "type") << "alias " << new_type_name << " = " << old << end();
+  type_tree* old_type = new_type_tree(old);
+  put(Type_abbreviations, new_type_name, old_type);
+}
+
+type_tree* new_type_tree(const string& x) {
+  string_tree* type_names = new string_tree(x);
+  type_names = parse_string_tree(type_names);
+  type_tree* result = new_type_tree(type_names);
+  delete type_names;
   return result;
 }
 
-//:: b) extensible type abbreviations
+:(scenario type_error1)
+% Hide_errors = true;
+type foo
++error: incomplete 'type' statement 'type foo'
 
-:(before "End Globals")
-map<string, type_tree*> Type_abbreviations;
+:(scenario type_error2)
+% Hide_errors = true;
+type foo =
++error: incomplete 'type' statement 'type foo ='
+
+:(scenario type_error3)
+% Hide_errors = true;
+type foo bar baz
++error: 'type' statements must be of the form 'type <new type name> = <type expression>' but got 'type foo bar'
+
+:(scenario type_conflict_error)
+% Hide_errors = true;
+type foo = bar
+type foo = baz
++error: 'type' conflict: 'foo' defined as both 'bar' and 'baz'
+
+//:: A few default abbreviations.
+
+:(before "End Mu Types Initialization")
+put(Type_abbreviations, "&", new type_tree("address"));
+put(Type_abbreviations, "@", new type_tree("array"));
+put(Type_abbreviations, "num", new type_tree("number"));
+put(Type_abbreviations, "bool", new type_tree("boolean"));
+put(Type_abbreviations, "char", new type_tree("character"));
+
+//:: Expand type aliases before running.
+//: We'll do this in a transform so that we don't need to define abbreviations
+//: before we use them.
+
+:(scenarios transform)
+:(scenario abbreviations_for_address_and_array)
+def main [
+  f 1:&:number  # abbreviation for 'address:number'
+  f 2:@:number  # abbreviation for 'array:number'
+  f 3:&:@:number  # combining '&' and '@'
+  f 4:&:&:@:&:@:number  # ..any number of times
+  f {5: (array (& number) 3)}  # support for dilated reagents and more complex parse trees
+]
+def f [
+]
++transform: --- expand type abbreviations in recipe 'main'
++transform: ingredient type after expanding abbreviations: ("address" "number")
++transform: ingredient type after expanding abbreviations: ("array" "number")
++transform: ingredient type after expanding abbreviations: ("address" "array" "number")
++transform: ingredient type after expanding abbreviations: ("address" "address" "array" "address" "array" "number")
++transform: ingredient type after expanding abbreviations: ("array" ("address" "number") "3")
+
+:(before "Transform.push_back(update_instruction_operations)")
+// Begin Type Modifying Transforms
+Transform.push_back(expand_type_abbreviations);  // idempotent
+// End Type Modifying Transforms
+
+:(code)
+void expand_type_abbreviations(const recipe_ordinal r) {
+  const recipe& caller = get(Recipe, r);
+  trace(9991, "transform") << "--- expand type abbreviations in recipe '" << caller.name << "'" << end();
+  for (int i = 0; i < SIZE(caller.steps); ++i) {
+    const instruction& inst = caller.steps.at(i);
+    trace(9991, "transform") << "instruction '" << inst.original_string << end();
+    for (long int i = 0; i < SIZE(inst.ingredients); ++i) {
+      expand_type_abbreviations(inst.ingredients.at(i).type);
+      trace(9992, "transform") << "ingredient type after expanding abbreviations: " << names_to_string(inst.ingredients.at(i).type) << end();
+    }
+    for (long int i = 0; i < SIZE(inst.products); ++i) {
+      expand_type_abbreviations(inst.products.at(i).type);
+      trace(9992, "transform") << "product type after expanding abbreviations: " << names_to_string(inst.products.at(i).type) << end();
+    }
+  }
+}
+
+void expand_type_abbreviations(type_tree* type) {
+  if (!type) return;
+  if (!type->atom) {
+    expand_type_abbreviations(type->left);
+    expand_type_abbreviations(type->right);
+    return;
+  }
+  if (contains_key(Type_abbreviations, type->name))
+    *type = type_tree(*get(Type_abbreviations, type->name));
+}
diff --git a/030container.cc b/030container.cc
index f3b50e19..1c7df774 100644
--- a/030container.cc
+++ b/030container.cc
@@ -174,7 +174,7 @@ const type_tree* root_type(const type_tree* t) {
 //: precompute Container_metadata before we need size_of
 //: also store a copy in each reagent in each instruction in each recipe
 
-:(after "Begin Instruction Modifying Transforms")  // needs to happen before transform_names, therefore after Type Modifying Transforms below
+:(after "End Type Modifying Transforms")
 Transform.push_back(compute_container_sizes);
 :(code)
 void compute_container_sizes(recipe_ordinal r) {
@@ -824,10 +824,8 @@ container bar [
 ]
 $error: 0
 
-:(after "Begin Instruction Modifying Transforms")
-// Begin Type Modifying Transforms
+:(before "End Type Modifying Transforms")
 Transform.push_back(check_or_set_invalid_types);  // idempotent
-// End Type Modifying Transforms
 
 :(code)
 void check_or_set_invalid_types(const recipe_ordinal r) {
diff --git a/053recipe_header.cc b/053recipe_header.cc
index 3f3c5403..d385d41c 100644
--- a/053recipe_header.cc
+++ b/053recipe_header.cc
@@ -221,8 +221,8 @@ def foo x:number -> y:number [
 Transform.push_back(check_calls_against_header);  // idempotent
 :(code)
 void check_calls_against_header(const recipe_ordinal r) {
-  trace(9991, "transform") << "--- type-check calls inside recipe " << get(Recipe, r).name << end();
   const recipe& caller = get(Recipe, r);
+  trace(9991, "transform") << "--- type-check calls inside recipe " << caller.name << end();
   for (int i = 0; i < SIZE(caller.steps); ++i) {
     const instruction& inst = caller.steps.at(i);
     if (inst.operation < MAX_PRIMITIVE_RECIPES) continue;
diff --git a/061text.mu b/061text.mu
index 6d0fb082..4ef96629 100644
--- a/061text.mu
+++ b/061text.mu
@@ -1,4 +1,5 @@
 # Some useful helpers for dealing with text (arrays of characters)
+type text = address:array:character
 
 # to-text-line gets called implicitly in various places
 # define it to be identical to 'to-text' by default
author	Kartik K. Agaram <vc@akkartik.com>	2016-09-11 17:14:48 -0700
committer	Kartik K. Agaram <vc@akkartik.com>	2016-09-11 17:50:36 -0700
commit	cdf0f349d1ad432d785cf69c7a136fff07258adf (patch)
tree	fe88b7b2039e9d50ce5b37cc115315a12f22d797
parent	68578a7828ce8300fa10b28b5f57e56723303e93 (diff)
download	mu-cdf0f349d1ad432d785cf69c7a136fff07258adf.tar.gz