https://github.com/akkartik/mu/blob/master/025compare.cc
  1 //: Comparison primitives
  2 
  3 :(before "End Primitive Recipe Declarations")
  4 EQUAL,
  5 :(before "End Primitive Recipe Numbers")
  6 put(Recipe_ordinal, "equal", EQUAL);
  7 :(before "End Primitive Recipe Checks")
  8 case EQUAL: {
  9   if (SIZE(inst.ingredients) <= 1) {
 10     raise << maybe(get(Recipe, r).name) << "'equal' needs at least two ingredients to compare in '" << to_original_string(inst) << "'\n" << end();
 11     break;
 12   }
 13   const reagent& exemplar = inst.ingredients.at(0);
 14   for (int i = /*skip exemplar*/1;  i < SIZE(inst.ingredients);  ++i) {
 15     if (!types_match(inst.ingredients.at(i), exemplar) && !types_match(exemplar, inst.ingredients.at(i))) {
 16       raise << maybe(get(Recipe, r).name) << "'equal' expects ingredients to be all of the same type, but got '" << to_original_string(inst) << "'\n" << end();
 17       goto finish_checking_instruction;
 18     }
 19   }
 20   if (SIZE(inst.products) > 1) {
 21     raise << maybe(get(Recipe, r).name) << "'equal' yields exactly one product in '" << to_original_string(inst) << "'\n" << end();
 22     break;
 23   }
 24   if (!inst.products.empty() && !is_dummy(inst.products.at(0)) && !is_mu_boolean(inst.products.at(0))) {
 25     raise << maybe(get(Recipe, r).name) << "'equal' should yield a boolean, but got '" << inst.products.at(0).original_string << "'\n" << end();
 26     break;
 27   }
 28   break;
 29 }
 30 :(before "End Primitive Recipe Implementations")
 31 case EQUAL: {
 32   vector<double>& exemplar = ingredients.at(0);
 33   bool result = true;
 34   for (int i = /*skip exemplar*/1;  i < SIZE(ingredients);  ++i) {
 35     if (SIZE(ingredients.at(i)) != SIZE(exemplar)) {
 36       result = false;
 37       break;
 38     }
 39     if (!equal(ingredients.at(i).begin(), ingredients.at(i).end(), exemplar.begin())) {
 40       result = false;
 41       break;
 42     }
 43   }
 44   products.resize(1);
 45   products.at(0).push_back(result);
 46   break;
 47 }
 48 
 49 :(scenario equal)
 50 def main [
 51   1:num <- copy 34
 52   2:num <- copy 33
 53   3:bool <- equal 1:num, 2:num
 54 ]
 55 +mem: location 1 is 34
 56 +mem: location 2 is 33
 57 +mem: storing 0 in location 3
 58 
 59 :(scenario equal_2)
 60 def main [
 61   1:num <- copy 34
 62   2:num <- copy 34
 63   3:bool <- equal 1:num, 2:num
 64 ]
 65 +mem: location 1 is 34
 66 +mem: location 2 is 34
 67 +mem: storing 1 in location 3
 68 
 69 :(scenario equal_multiple)
 70 def main [
 71   1:bool <- equal 34, 34, 34
 72 ]
 73 +mem: storing 1 in location 1
 74 
 75 :(scenario equal_multiple_2)
 76 def main [
 77   1:bool <- equal 34, 34, 35
 78 ]
 79 +mem: storing 0 in location 1
 80 
 81 :(before "End Primitive Recipe Declarations")
 82 NOT_EQUAL,
 83 :(before "End Primitive Recipe Numbers")
 84 put(Recipe_ordinal, "not-equal", NOT_EQUAL);
 85 :(before "End Primitive Recipe Checks")
 86 case NOT_EQUAL: {
 87   if (SIZE(inst.ingredients) != 2) {
 88     raise << maybe(get(Recipe, r).name) << "'equal' needs two ingredients to compare in '" << to_original_string(inst) << "'\n" << end();
 89     break;
 90   }
 91   const reagent& exemplar = inst.ingredients.at(0);
 92   if (!types_match(inst.ingredients.at(1), exemplar) && !types_match(exemplar, inst.ingredients.at(1))) {
 93     raise << maybe(get(Recipe, r).name) << "'equal' expects ingredients to be all of the same type, but got '" << to_original_string(inst) << "'\n" << end();
 94     goto finish_checking_instruction;
 95   }
 96   if (SIZE(inst.products) > 1) {
 97     raise << maybe(get(Recipe, r).name) << "'equal' yields exactly one product in '" << to_original_string(inst) << "'\n" << end();
 98     break;
 99   }
100   if (!inst.products.empty() && !is_dummy(inst.products.at(0)) && !is_mu_boolean(inst.products.at(0))) {
101     raise << maybe(get(Recipe, r).name) << "'equal' should yield a boolean, but got '" << inst.products.at(0).original_string << "'\n" << end();
102     break;
103   }
104   break;
105 }
106 :(before "End Primitive Recipe Implementations")
107 case NOT_EQUAL: {
108   vector<double>& exemplar = ingredients.at(0);
109   products.resize(1);
110   if (SIZE(ingredients.at(1)) != SIZE(exemplar)) {
111     products.at(0).push_back(true);
112     break;
113   }
114   bool equal_ingredients = equal(ingredients.at(1).begin(), ingredients.at(1).end(), exemplar.begin());
115   products.at(0).push_back(!equal_ingredients);
116   break;
117 }
118 
119 :(scenario not_equal)
120 def main [
121   1:num <- copy 34
122   2:num <- copy 33
123   3:bool <- not-equal 1:num, 2:num
124 ]
125 +mem: location 1 is 34
126 +mem: location 2 is 33
127 +mem: storing 1 in location 3
128 
129 :(scenario not_equal_2)
130 def main [
131   1:num <- copy 34
132   2:num <- copy 34
133   3:bool <- not-equal 1:num, 2:num
134 ]
135 +mem: location 1 is 34
136 +mem: location 2 is 34
137 +mem: storing 0 in location 3
138 
139 :(before "End Primitive Recipe Declarations")
140 GREATER_THAN,
141 :(before "End Primitive Recipe Numbers")
142 put(Recipe_ordinal, "greater-than", GREATER_THAN);
143 :(before "End Primitive Recipe Checks")
144 case GREATER_THAN: {
145   if (SIZE(inst.ingredients) <= 1) {
146     raise << maybe(get(Recipe, r).name) << "'greater-than' needs at least two ingredients to compare in '" << to_original_string(inst) << "'\n" << end();
147     break;
148   }
149   for (int i = 0;  i < SIZE(inst.ingredients);  ++i) {
150     if (!is_mu_number(inst.ingredients.at(i))) {
151       raise << maybe(get(Recipe, r).name) << "'greater-than' can only compare numbers; got '" << inst.ingredients.at(i).original_string << "'\n" << end();
152       goto finish_checking_instruction;
153     }
154   }
155   if (SIZE(inst.products) > 1) {
156     raise << maybe(get(Recipe, r).name) << "'greater-than' yields exactly one product in '" << to_original_string(inst) << "'\n" << end();
157     break;
158   }
159   if (!inst.products.empty() && !is_dummy(inst.products.at(0)) && !is_mu_boolean(inst.products.at(0))) {
160     raise << maybe(get(Recipe, r).name) << "'greater-than' should yield a boolean, but got '" << inst.products.at(0).original_string << "'\n" << end();
161     break;
162   }
163   break;
164 }
165 :(before "End Primitive Recipe Implementations")
166 case GREATER_THAN: {
167   bool result = true;
168   for (int i = /**/1;  i < SIZE(ingredients);  ++i) {
169     if (ingredients.at(i-1).at(0) <= ingredients.at(i).at(0)) {
170       result = false;
171     }
172   }
173   products.resize(1);
174   products.at(0).push_back(result);
175   break;
176 }
177 
178 :(scenario greater_than)
179 def main [
180   1:num <- copy 34
181   2:num <- copy 33
182   3:bool <- greater-than 1:num, 2:num
183 ]
184 +mem: storing 1 in location 3
185 
186 :(scenario greater_than_2)
187 def main [
188   1:num <- copy 34
189   2:num <- copy 34
190   3:bool <- greater-than 1:num, 2:num
191 ]
192 +mem: storing 0 in location 3
193 
194 :(scenario greater_than_multiple)
195 def main [
196   1:bool <- greater-than 36, 35, 34
197 ]
198 +mem: storing 1 in location 1
199 
200 :(scenario greater_than_multiple_2)
201 def main [
202   1:bool <- greater-than 36, 35, 35
203 ]
204 +mem: storing 0 in location 1
205 
206 :(before "End Primitive Recipe Declarations")
207 LESSER_THAN,
208 :(before "End Primitive Recipe Numbers")
209 put(Recipe_ordinal, "lesser-than", LESSER_THAN);
210 :(before "End Primitive Recipe Checks")
211 case LESSER_THAN: {
212   if (SIZE(inst.ingredients) <= 1) {
213     raise << maybe(get(Recipe, r).name) << "'lesser-than' needs at least two ingredients to compare in '" << to_original_string(inst) << "'\n" << end();
214     break;
215   }
216   for (int i = 0;  i < SIZE(inst.ingredients);  ++i) {
217     if (!is_mu_number(inst.ingredients.at(i))) {
218       raise << maybe(get(Recipe, r).name) << "'lesser-than' can only compare numbers; got '" << inst.ingredients.at(i).original_string << "'\n" << end();
219       goto finish_checking_instruction;
220     
//: Allow instructions to mention literals directly.
//:
//: This layer will transparently move them to the global segment (assumed to
//: always be the second segment).

void test_transform_literal_string() {
  run(
      "== code\n"
      "b8/copy  \"test\"/imm32\n"
      "== data\n"  // need to manually create the segment for now
  );
  CHECK_TRACE_CONTENTS(
      "transform: -- move literal strings to data segment\n"
      "transform: adding global variable '__subx_global_1' containing \"test\"\n"
      "transform: instruction after transform: 'b8 __subx_global_1'\n"
  );
}

//: We don't rely on any transforms running in previous layers, but this layer
//: knows about labels and global variables and will emit them for previous
//: layers to transform.
:(after "Begin Transforms")
// Begin Level-3 Transforms
Transform.push_back(transform_literal_strings);
// End Level-3 Transforms

:(before "End Globals")
int Next_auto_global = 1;
:(code)
void transform_literal_strings(program& p) {
  trace(3, "transform") << "-- move literal strings to data segment" << end();
  if (p.segments.empty()) return;
  segment& code = p.segments.at(0);
  segment data;
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    for (int j = 0;  j < SIZE(inst.words);  ++j) {
      word& curr = inst.words.at(j);
      if (curr.data.at(0) != '"') continue;
      ostringstream global_name;
      global_name << "__subx_global_" << Next_auto_global;
      ++Next_auto_global;
      add_global_to_data_segment(global_name.str(), curr, data);
      curr.data = global_name.str();
    }
    trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
  }
  if (data.lines.empty()) return;
  if (SIZE(p.segments) < 2) {
    p.segments.resize(2);
    p.segments.at(1).lines.swap(data.lines);
  }
  vector<line>& existing_data = p.segments.at(1).lines;
  existing_data.insert(existing_data.end(), data.lines.begin(), data.lines.end());
}

void add_global_to_data_segment(const string& name, const word& value, segment& data) {
  trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
  // emit label
  data.lines.push_back(label(name));
  // emit size for size-prefixed array
  data.lines.push_back(line());
  emit_hex_bytes(data.lines.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/);
  // emit data byte by byte
  data.lines.push_back(line());
  line& curr = data.lines.back();
  for (int i = /*skip start quote*/1;  i < SIZE(value.data)-/*skip end quote*/1;  ++i) {
    char c = value.data.at(i);
    curr.words.push_back(word());
    curr.words.back().data = hex_byte_to_string(c);
    curr.words.back().metadata.push_back(string(1, c));
  }
}

//: Within strings, whitespace is significant. So we need to redo our instruction
//: parsing.

void test_instruction_with_string_literal() {
  parse_instruction_character_by_character(
      "a \"abc  def\" z\n"  // two spaces inside string
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: a\n"
      "parse2: word: \"abc  def\"\n"
      "parse2: word: z\n"
  );
  // no other words
  CHECK_TRACE_COUNT("parse2", 3);
}

:(before "End Line Parsing Special-cases(line_data -> l)")
if (line_data.find('"') != string::npos) {  // can cause false-positives, but we can handle them
  parse_instruction_character_by_character(line_data, l);
  continue;
}

:(code)
void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
  if (line_data.find('\n') != string::npos  && line_data.find('\n') != line_data.size()-1) {
    raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
    return;
  }
  // parse literals
  istringstream in(line_data);
  in >> std::noskipws;
  line result;
  result.original = line_data;
  // add tokens (words or strings) one by one
  while (has_data(in)) {
    skip_whitespace(in);
    if (!has_data(in)) break;
    char c = in.get();
    if (c == '#') break;  // comment; drop rest of line
    if (c == ':') break;  // line metadata; skip for now
    if (c == '.') {
      if (!has_data(in)) break;  // comment token at end of line
      if (isspace(in.peek()))
        continue;  // '.' followed by space is comment token; skip
    }
    result.words.push_back(word());
    if (c == '"') {
      // string literal; slurp everything between quotes into data
      ostringstream d;
      d << c;
      while (has_data(in)) {
        in >> c;
        if (c == '\\') {
          in >> c;
          if (c == 'n') d << '\n';
          else if (c == '"') d << '"';
          else if (c == '\\') d << '\\';
          else {
            raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
            return;
          }
          continue;
        } else {
          d << c;
        }
        if (c == '"') break;
      }
      result.words.back().data = d.str();
      // slurp metadata
      ostringstream m;
      while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
        in >> c;
        if (c == '/') {
          if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
          m.str("");
        }
        else {
          m << c;
        }
      }
      if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
    }
    else {
      // not a string literal; slurp all characters until whitespace
      ostringstream w;
      w << c;
      while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
        in >> c;
        w << c;
      }
      parse_word(w.str(), result.words.back());
    }
    trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
  }
  if (!result.words.empty())
    out.push_back(result);
}

void skip_whitespace(istream& in) {
  while (true) {
    if (has_data(in) && isspace(in.peek())) in.get();
    else break;
  }
}

void skip_comment(istream& in) {
  if (has_data(in) && in.peek() == '#') {
    in.get();
    while (has_data(in) && in.peek() != '\n') in.get();
  }
}

// helper for tests
void parse_instruction_character_by_character(const string& line_data) {
  vector<line> out;
  parse_instruction_character_by_character(line_data, out);
}

void test_parse2_comment_token_in_middle() {
  parse_instruction_character_by_character(
      "a . z\n"
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: a\n"
      "parse2: word: z\n"
  );
  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
  // no other words
  CHECK_TRACE_COUNT("parse2", 2);
}

void test_parse2_word_starting_with_dot() {
  parse_instruction_character_by_character(
      "a .b c\n"
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: a\n"
      "parse2: word: .b\n"
      "parse2: word: c\n"
  );
}

void test_parse2_comment_token_at_start() {
  parse_instruction_character_by_character(
      ". a b\n"
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: a\n"
      "parse2: word: b\n"
  );
  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
}

void test_parse2_comment_token_at_end() {
  parse_instruction_character_by_character(
      "a b .\n"
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: a\n"
      "parse2: word: b\n"
  );
  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
}

void test_parse2_word_starting_with_dot_at_start() {
  parse_instruction_character_by_character(
      ".a b c\n"
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: .a\n"
      "parse2: word: b\n"
      "parse2: word: c\n"
  );
}

void test_parse2_metadata() {
  parse_instruction_character_by_character(
      ".a b/c d\n"
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: .a\n"
      "parse2: word: b /c\n"
      "parse2: word: d\n"
  );
}

void test_parse2_string_with_metadata() {
  parse_instruction_character_by_character(
      "a \"bc  def\"/disp32 g\n"
  );
  CHECK_TRACE_CONTENTS(
      "parse2: word: a\n"
      "parse2: word: \"bc  def\" /disp32\n"