4548: start of a compiler for a new experimental low-level language

author: Kartik Agaram <vc@akkartik.com> 2018-09-17 22:57:10 -0700
committer: Kartik Agaram <vc@akkartik.com> 2018-09-17 22:57:58 -0700
commit: f09280141f18fbe8cef0ed576cf932e12e315666 (patch)
tree: d00962b07cb013f89d4fdb2fcf19c392afb62b5c /transect/011load.cc
parent: 0a7b03727a736f73c16d37b22afef8496c60d657 (diff)
download: mu-f09280141f18fbe8cef0ed576cf932e12e315666.tar.gz
1 files changed, 228 insertions, 0 deletions
diff --git a/transect/011load.cc b/transect/011load.cc
new file mode 100644
index 00000000..f8cf96e8
--- /dev/null
+++ b/transect/011load.cc
@@ -0,0 +1,228 @@
+//: Phase 1 of translating Mu code: load it from a textual representation.
+//:
+//: The process of translating Mu code:
+//:   load -> check types -> convert
+
+:(scenarios load)  // use 'load' instead of 'run' in all scenarios in this layer
+:(scenario single_function)
+fn foo [
+  1 : int <- copy 23
+]
++parse: function: foo
++parse:   0 in operands
++parse:   0 in_out operands
++parse: instruction: copy
++parse:   in => 23 : literal
++parse:   in_out => 1 : int
+
+:(code)
+void load(string form) {
+  istringstream in(form);
+  load(in);
+}
+
+void load(istream& in) {
+  while (has_data(in)) {
+    string line_data;
+    getline(in, line_data);
+    if (line_data.empty()) continue;  // maybe eof
+    char c = first_non_whitespace(line_data);
+    if (c == '\0') continue;  // only whitespace
+    if (c == '#') continue;  // only comment
+    trace(99, "parse") << "line: " << line_data << end();
+    istringstream lin(line_data);
+    while (has_data(lin)) {
+      string word_data;
+      lin >> word_data;
+      if (word_data.empty()) continue;  // maybe eof
+      if (word_data[0] == '#') break;  // comment; ignore rest of line
+      if (word_data == "record")
+        load_record(lin, in);
+      else if (word_data == "choice")
+        load_choice(lin, in);
+      else if (word_data == "var")
+        load_global(lin, in);
+      else if (word_data == "fn")
+        load_function(lin, in);
+      else
+        raise << "unrecognized top-level keyword '" << word_data << "'; should be one of 'record', 'choice', 'var' or 'fn'\n" << end();
+      break;
+    }
+    // nothing here, because we'll be at the next top-level declaration
+  }
+}
+
+void load_record(istream& first_line, istream& in) {
+}
+
+void load_choice(istream& first_line, istream& in) {
+}
+
+void load_global(istream& first_line, istream& in) {
+}
+
+void load_function(istream& first_line, istream& in) {
+  string name;
+  assert(has_data(first_line));
+  first_line >> name;
+  trace(99, "parse") << "function: " << name << end();
+  function_info& curr = new_function(name);
+  string tmp;
+  // read in parameters
+  while (has_data(first_line)) {
+    // read operand name
+    first_line >> tmp;
+//?     cerr << "0: " << tmp << '\n';
+    if (tmp == "[") break;
+    if (tmp == "->") break;
+    assert(tmp != ":");
+    curr.in.push_back(operand(tmp));
+
+    // skip ':'
+    assert(has_data(first_line));
+    first_line >> tmp;
+//?     cerr << "1: " << tmp << '\n';
+    assert(tmp == ":");  // types are required in function headers
+
+    // read operand type
+    assert(has_data(first_line));
+    curr.in.back().set_type(first_line);
+  }
+  // read in-out parameters
+  while (tmp != "[" && has_data(first_line)) {
+    // read operand name
+    first_line >> tmp;
+//?     cerr << "inout 0: " << tmp << '\n';
+    if (tmp == "[") break;
+    assert(tmp != "->");
+    assert(tmp != ":");  // types are required in function headers
+    curr.in_out.push_back(operand(tmp));
+
+    // skip ':'
+    assert(has_data(first_line));
+    first_line >> tmp;
+//?     cerr << "inout 1: " << tmp << '\n';
+    assert(tmp == ":");
+
+    // read operand type
+    assert(has_data(first_line));
+    curr.in.back().set_type(first_line);
+  }
+  trace(99, "parse") << "  " << SIZE(curr.in) << " in operands" << end();
+  trace(99, "parse") << "  " << SIZE(curr.in_out) << " in_out operands" << end();
+  // not bothering checking for tokens past '[' in first_line
+  
+  // read instructions
+  while (has_data(in)) {
+    string line_data;
+    getline(in, line_data);
+    if (first_non_whitespace(line_data) == ']') break;
+//?     bool has_in_out = (line_data.find("<-") != string::npos);
+    istringstream line(line_data);
+    vector<string> words;
+    bool has_in_out = false;
+    while (has_data(line)) {
+      string w;
+      line >> w;
+      words.push_back(w);
+      if (w == "<-")
+        has_in_out = true;
+    }
+    instruction inst;
+    int i = 0;
+    assert(i < SIZE(words));
+    if (has_in_out) {
+      while (i < SIZE(words)) {
+//?         cerr << "in-out operand: " << i << ' ' << words.at(i) << '\n';
+        inst.in_out.push_back(operand(words.at(i)));
+        ++i;
+        assert(i < SIZE(words));
+        if (words.at(i) == ":") {
+          ++i;  // skip ':'
+          assert(i < SIZE(words));
+          assert(words.at(i) != "<-");
+          assert(words.at(i) != ":");
+          istringstream tmp(words.at(i));
+//?           cerr << "setting type to " << i << ' ' << words.at(i) << '\n';
+          inst.in_out.back().set_type(tmp);
+//?           cerr << "done\n";
+          ++i;
+          assert(i < SIZE(words));
+        }
+        if (words.at(i) == "<-") break;
+      }
+      assert(i < SIZE(words));
+      assert(words.at(i) == "<-");
+      ++i;
+    }
+    assert(i < SIZE(words));
+    assert(words.at(i) != "<-");
+    assert(words.at(i) != ":");
+    inst.name = words.at(i);
+    ++i;
+    while (i < SIZE(words)) {
+      inst.in.push_back(operand(words.at(i)));
+      ++i;
+      if (i < SIZE(words) && words.at(i) == ":") {
+        ++i;  // skip ':'
+        assert(i < SIZE(words));
+        assert(words.at(i) != "<-");
+        assert(words.at(i) != ":");
+        istringstream tmp(words.at(i));
+        inst.in.back().set_type(tmp);
+        ++i;
+      }
+      else if (is_integer(inst.in.back().name)) {
+        inst.in.back().type.push_back(Literal_type_id);
+      }
+    }
+    trace(99, "parse") << "instruction: " << inst.name << end();
+    for (int i = 0;  i < SIZE(inst.in);  ++i)
+      trace(99, "parse") << "  in => " << to_string(inst.in.at(i)) << end();
+    for (int i = 0;  i < SIZE(inst.in_out);  ++i)
+      trace(99, "parse") << "  in_out => " << to_string(inst.in_out.at(i)) << end();
+    curr.instructions.push_back(inst);
+  }
+}
+
+function_info& new_function(string name) {
+  assert(!contains_key(Function_id, name));
+  int id = Next_function_id++;
+  put(Function_id, name, id);
+  assert(!contains_key(Function_info, id));
+  function_info& result = Function_info[id];  // insert
+  result.id = id;
+  result.name = name;
+  return result;
+}
+
+char first_non_whitespace(string in) {
+  for (int i = 0;  i < SIZE(in);  ++i)
+    if (!isspace(in.at(i))) return in.at(i);
+  return '\0';
+}
+
+bool is_integer(const string& s) {
+  return s.find_first_not_of("0123456789-") == string::npos  // no other characters
+      && s.find_first_of("0123456789") != string::npos  // at least one digit
+      && s.find('-', 1) == string::npos;  // '-' only at first position
+}
+
+int to_integer(string n) {
+  char* end = NULL;
+  // safe because string.c_str() is guaranteed to be null-terminated
+  int result = strtoll(n.c_str(), &end, /*any base*/0);
+  if (*end != '\0') cerr << "tried to convert " << n << " to number\n";
+  assert(*end == '\0');
+  return result;
+}
+
+void test_is_integer() {
+  CHECK(is_integer("1234"));
+  CHECK(is_integer("-1"));
+  CHECK(!is_integer("234.0"));
+  CHECK(is_integer("-567"));
+  CHECK(!is_integer("89-0"));
+  CHECK(!is_integer("-"));
+  CHECK(!is_integer("1e3"));  // not supported
+}
author	Kartik Agaram <vc@akkartik.com>	2018-09-17 22:57:10 -0700
committer	Kartik Agaram <vc@akkartik.com>	2018-09-17 22:57:58 -0700
commit	f09280141f18fbe8cef0ed576cf932e12e315666 (patch)
tree	d00962b07cb013f89d4fdb2fcf19c392afb62b5c /transect/011load.cc
parent	0a7b03727a736f73c16d37b22afef8496c60d657 (diff)
download	mu-f09280141f18fbe8cef0ed576cf932e12e315666.tar.gz