054dilated_reagent.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

//: An alternative syntax for reagents that permits whitespace in properties,
//: grouped by brackets. We'll use this ability in the next layer, when we
//: generalize types from lists to trees of properties.

:(scenarios load)
:(scenario dilated_reagent)
recipe main [
  {1: number, foo: bar} <- copy 34
]
+parse:   product: {"1": "number", "foo": "bar"}

:(scenario load_trailing_space_after_curly_bracket)
recipe main [
  # line below has a space at the end
  { 
]
# successfully parsed

//: First augment next_word to group balanced brackets together.

:(before "End next_word Special-cases")
if (in.peek() == '(')
  return slurp_balanced_bracket(in);
// treat curlies mostly like parens, but don't mess up labels
if (start_of_dilated_reagent(in))
  return slurp_balanced_bracket(in);

:(code)
// A curly is considered a label if it's the last thing on a line. Dilated
// reagents should remain all on one line.
bool start_of_dilated_reagent(istream& in) {
  if (in.peek() != '{') return false;
  long long int pos = in.tellg();
  in.get();  // slurp '{'
  skip_whitespace_but_not_newline(in);
  char next = in.peek();
  in.seekg(pos);
  return next != '\n';
}

// Assume the first letter is an open bracket, and read everything until the
// matching close bracket.
// We balance {} () and []. And we skip one character after '\'.
string slurp_balanced_bracket(istream& in) {
  ostringstream result;
  char c;
  list<char> open_brackets;
  while (in >> c) {
    if (c == '\\') {
      // always silently skip the next character
      result << c;
      if (!(in >> c)) break;
      result << c;
      continue;
    }
    if (c == '(') open_brackets.push_back(c);
    if (c == ')') {
      assert(open_brackets.back() == '(');
      open_brackets.pop_back();
    }
    if (c == '[') open_brackets.push_back(c);
    if (c == ']') {
      assert(open_brackets.back() == '[');
      open_brackets.pop_back();
    }
    if (c == '{') open_brackets.push_back(c);
    if (c == '}') {
      assert(open_brackets.back() == '{');
      open_brackets.pop_back();
    }
    result << c;
    if (open_brackets.empty()) break;
  }
  return result.str();
}

:(after "Parsing reagent(string s)")
if (s.at(0) == '{') {
  assert(properties.empty());
  istringstream in(s);
  in >> std::noskipws;
  in.get();  // skip '{'
  while (has_data(in)) {
    string key = slurp_key(in);
    if (key.empty()) continue;
    if (key == "}") continue;
    string_tree* value = new string_tree(next_word(in));
    // End Parsing Reagent Property(value)
    properties.push_back(pair<string, string_tree*>(key, value));
  }
  // structures for the first row of properties
  name = properties.at(0).first;
  string type_name = properties.at(0).second->value;
  if (!contains_key(Type_ordinal, type_name)) {
      // this type can't be an integer literal
    put(Type_ordinal, type_name, Next_type_ordinal++);
  }
  type = new type_tree(get(Type_ordinal, type_name));
  return;
}

:(code)
string slurp_key(istream& in) {
  string result = next_word(in);
  while (!result.empty() && *result.rbegin() == ':')
    strip_last(result);
  while (isspace(in.peek()) || in.peek() == ':')
    in.get();
  return result;
}