1 //: Extend 'new' to handle a unicode string literal argument or 'text'.
  2 
  3 //: A Mu text is an address to an array of characters.
  4 :(before "End Mu Types Initialization")
  5 put(Type_abbreviations, "text", new_type_tree("address:array:character"));
  6 
  7 :(scenario new_string)
  8 def main [
  9   1:text <- new [abc def]
 10   2:char <- index *1:text, 5
 11 ]
 12 # number code for 'e'
 13 +mem: storing 101 in location 2
 14 
 15 :(scenario new_string_handles_unicode)
 16 def main [
 17   1:text <- new [a«c]
 18   2:num <- length *1:text
 19   3:char <- index *1:text, 1
 20 ]
 21 +mem: storing 3 in location 2
 22 # unicode for '«'
 23 +mem: storing 171 in location 3
 24 
 25 :(before "End NEW Check Special-cases")
 26 if (is_literal_text(inst.ingredients.at(0))) break;
 27 :(before "Convert 'new' To 'allocate'")
 28 if (inst.name == "new" && !inst.ingredients.empty() && is_literal_text(inst.ingredients.at(0))) continue;
 29 :(after "case NEW" following "Primitive Recipe Implementations")
 30   if (is_literal_text(current_instruction().ingredients.at(0))) {
 31   ¦ products.resize(1);
 32   ¦ products.at(0).push_back(new_mu_text(current_instruction().ingredients.at(0).name));
 33   ¦ trace(9999, "mem") << "new string alloc: " << products.at(0).at(0) << end();
 34   ¦ break;
 35   }
 36 
 37 :(code)
 38 int new_mu_text(const string& contents) {
 39   // allocate an array just large enough for it
 40   int string_length = unicode_length(contents);
 41 //?   Total_alloc += string_length+1;
 42 //?   ++Num_alloc;
 43   int result = allocate(string_length+/*array length*/1);
 44   trace(9999, "mem") << "storing string refcount 0 in location " << result << end();
 45   put(Memory, result, 0);
 46   int curr_address = result+/*skip refcount*/1;
 47   trace(9999, "mem") << "storing string length " << string_length << " in location " << curr_address << end();
 48   put(Memory, curr_address, string_length);
 49   ++curr_address;  // skip length
 50   int curr = 0;
 51   const char* raw_contents = contents.c_str();
 52   for (int i = 0;  i < string_length;  ++i) {
 53   ¦ uint32_t curr_character;
 54   ¦ assert(curr < SIZE(contents));
 55   ¦ tb_utf8_char_to_unicode(&curr_character, &raw_contents[curr]);
 56   ¦ trace(9999, "mem") << "storing string character " << curr_character << " in location " << curr_address << end();
 57   ¦ put(Memory, curr_address, curr_character);
 58   ¦ curr += tb_utf8_char_length(raw_contents[curr]);
 59   ¦ ++curr_address;
 60   }
 61   // Mu strings are not null-terminated in memory.
 62   return result;
 63 }
 64 
 65 //: a new kind of typo
 66 
 67 :(scenario string_literal_without_instruction)
 68 % Hide_errors = true;
 69 def main [
 70   [abc]
 71 ]
 72 +error: main: instruction '[abc]' has no recipe in '[abc]'
 73 
 74 //: stash recognizes strings
 75 
 76 :(scenario stash_string)
 77 def main [
 78   1:text <- new [abc]
 79   stash [foo:], 1:text
 80 ]
 81 +app: foo: abc
 82 
 83 :(before "End inspect Special-cases(r, data)")
 84 if (is_mu_text(r)) {
 85   assert(scalar(data));
 86   return read_mu_text(data.at(0));
 87 }
 88 
 89 :(before "End $print Special-cases")
 90 else if (is_mu_text(current_instruction().ingredients.at(i))) {
 91   cout << read_mu_text(ingredients.at(i).at(0));
 92 }
 93 
 94 :(scenario unicode_string)
 95 def main [
 96   1:text <- new [♠]
 97   stash [foo:], 1:text
 98 ]
 99 +app: foo: ♠
100 
101 :(scenario stash_space_after_string)
102 def main [
103   1:text <- new [abc]
104   stash 1:text, [foo]
105 ]
106 +app: abc foo
107 
108 :(scenario stash_string_as_array)
109 def main [
110   1:text <- new [abc]
111   stash *1:text
112 ]
113 +app: 3 97 98 99
114 
115 //: fixes way more than just stash
116 :(before "End Preprocess is_mu_text(reagent x)")
117 if (!canonize_type(x)) return false;
118 
119 //: Allocate more to routine when initializing a literal string
120 :(scenario new_string_overflow)
121 % Initial_memory_per_routine = 3;
122 def main [
123   1:address:num/raw <- new number:type
124   2:text/raw <- new [a]  # not enough room in initial page, if you take the refcount and array length into account
125 ]
126 +new: routine allocated memory from 1000 to 1003
127 +new: routine allocated memory from 1003 to 1006
128 
129 //: helpers
130 :(code)
131 int unicode_length(const string& s) {
132   const char* in = s.c_str();
133   int result = 0;
134   int curr = 0;
135   while (curr < SIZE(s)) {  // carefully bounds-check on the string
136   ¦ // before accessing its raw pointer
137   ¦ ++result;
138   ¦ curr += tb_utf8_char_length(in[curr]);
139   }
140   return result;
141 }
142 
143 string read_mu_text(int address) {
144   if (address == 0) return "";
145   ++address;  // skip refcount
146   int length = get_or_insert(Memory, address);
147   if (length == 0) return "";
148   return read_mu_characters(address+1, length);
149 }
150 
151 string read_mu_characters(int start, int length) {
152   ostringstream tmp;
153   for (int curr = start;  curr < start+length;  ++curr)
154   ¦ tmp << to_unicode(static_cast<uint32_t>(get_or_insert(Memory, curr)));
155   return tmp.str();
156 }
157 
158 //:: some miscellaneous helpers now that we have text
159 
160 //: assert: perform sanity checks at runtime
161 
162 :(scenario assert)
163 % Hide_errors = true;  // '%' lines insert arbitrary C code into tests before calling 'run' with the lines below. Must be immediately after :(scenario) line.
164 def main [
165   assert 0, [this is an assert in Mu]
166 ]
167 +error: this is an assert in Mu
168 
169 :(before "End Primitive Recipe Declarations")
170 ASSERT,
171 :(before "End Primitive Recipe Numbers")
172 put(Recipe_ordinal, "assert", ASSERT);
173 :(before "End Primitive Recipe Checks")
174 case ASSERT: {
175   if (SIZE(inst.ingredients) != 2) {
176   ¦ raise << maybe(get(Recipe, r).name) << "'assert' takes exactly two ingredients rather than '" << to_original_string(inst) << "'\n" << end();
177   ¦ break;
178   }
179   if (!is_mu_scalar(inst.ingredients.at(0))) {
180   ¦ raise << maybe(get(Recipe, r).name) << "'assert' requires a boolean for its first ingredient, but got '" << inst.ingredients.at(0).original_string << "'\n" << end();
181   ¦ break;
182   }
183   if (!is_literal_text(inst.ingredients.at(1)) && !is_mu_text(inst.ingredients.at(1))) {
184   ¦ raise << maybe(get(Recipe, r).name) << "'assert' requires a text as its second ingredient, but got '" << inst.ingredients.at(1).original_string << "'\n" << end();
185   ¦ break;
186   }
187   break;
188 }
189 :(before "End Primitive Recipe Implementations")
190 case ASSERT: {
191   if (!ingredients.at(0).at(0)) {
192   ¦ if (is_literal_text(current_instruction().ingredients.at(1)))
193   ¦ ¦ raise << current_instruction().ingredients.at(1).name << '\n' << end();
194   ¦ else
195   ¦ ¦ raise << read_mu_text(ingredients.at(1).at(0)) << '\n' << end();
196   ¦ if (!Hide_errors) exit(1);
197   }
198   break;
199 }
200 
201 //: 'cheating' by using the host system
202 
203 :(before "End Primitive Recipe Declarations")
204 _READ,
205 :(before "End Primitive Recipe Numbers")
206 put(Recipe_ordinal, "$read", _READ);
207 :(before "End Primitive Recipe Checks")
208 case _READ: {
209   break;
210 }
211 :(before "End Primitive Recipe Implementations")
212 case _READ: {
213   skip_whitespace(cin);
214   string result;
215   if (has_data(cin))
216   ¦ cin >> result;
217   products.resize(1);
218   products.at(0).push_back(new_mu_text(result));
219   break;
220 }
221 
222 :(code)
223 void skip_whitespace(istream& in) {
224   while (true) {
225   ¦ if (!has_data(in)) break;
226   ¦ if (isspace(in.peek())) in.get();
227   ¦ else break;
228   }
229 }