https://github.com/akkartik/mu/blob/master/038new_text.cc
  1 //: Extend 'new' to handle a unicode string literal argument or 'text'.
  2 
  3 //: A Mu text is an address to an array of characters.
  4 :(before "End Mu Types Initialization")
  5 put(Type_abbreviations, "text", new_type_tree("&:@:character"));
  6 
  7 :(scenario new_string)
  8 def main [
  9   10:text <- new [abc def]
 10   20:char <- index *10:text, 5
 11 ]
 12 # number code for 'e'
 13 +mem: storing 101 in location 20
 14 
 15 :(scenario new_string_handles_unicode)
 16 def main [
 17   10:text <- new [a«c]
 18   20:num <- length *10:text
 19   21:char <- index *10:text, 1
 20 ]
 21 +mem: storing 3 in location 20
 22 # unicode for '«'
 23 +mem: storing 171 in location 21
 24 
 25 :(before "End NEW Check Special-cases")
 26 if (is_literal_text(inst.ingredients.at(0))) break;
 27 :(before "Convert 'new' To 'allocate'")
 28 if (inst.name == "new" && !inst.ingredients.empty() && is_literal_text(inst.ingredients.at(0))) continue;
 29 :(after "case NEW" following "Primitive Recipe Implementations")
 30   if (is_literal_text(current_instruction().ingredients.at(0))) {
 31     products.resize(1);
 32     products.at(0).push_back(/*alloc id*/0);
 33     products.at(0).push_back(new_mu_text(current_instruction().ingredients.at(0).name));
 34     trace("mem") << "new string alloc: " << products.at(0).at(0) << end();
 35     break;
 36   }
 37 
 38 :(code)
 39 int new_mu_text(const string& contents) {
 40   // allocate an array just large enough for it
 41   int string_length = unicode_length(contents);
 42 //?   Total_alloc += string_length+1;
 43 //?   ++Num_alloc;
 44   int result = allocate(/*array length*/1 + string_length);
 45   int curr_address = result;
 46   ++curr_address;  // skip alloc id
 47   trace("mem") << "storing string length " << string_length << " in location " << curr_address << end();
 48   put(Memory, curr_address, string_length);
 49   ++curr_address;  // skip length
 50   int curr = 0;
 51   const char* raw_contents = contents.c_str();
 52   for (int i = 0;  i < string_length;  ++i) {
 53     uint32_t curr_character;
 54     assert(curr < SIZE(contents));
 55     tb_utf8_char_to_unicode(&curr_character, &raw_contents[curr]);
 56     trace("mem") << "storing string character " << curr_character << " in location " << curr_address << end();
 57     put(Memory, curr_address, curr_character);
 58     curr += tb_utf8_char_length(raw_contents[curr]);
 59     ++curr_address;
 60   }
 61   // Mu strings are not null-terminated in memory.
 62   return result;
 63 }
 64 
 65 //: a new kind of typo
 66 
 67 :(scenario literal_text_without_instruction)
 68 % Hide_errors = true;
 69 def main [
 70   [abc]
 71 ]
 72 +error: main: instruction '[abc]' has no recipe in '[abc]'
 73 
 74 //: stash recognizes texts
 75 
 76 :(scenario stash_text)
 77 def main [
 78   1:text <- new [abc]
 79   stash [foo:], 1:text
 80 ]
 81 +app: foo: abc
 82 
 83 :(before "End inspect Special-cases(r, data)")
 84 if (is_mu_text(r)) {
 85   return read_mu_text(data.at(/*skip alloc id*/1));
 86 }
 87 
 88 :(before "End $print Special-cases")
 89 else if (is_mu_text(current_instruction().ingredients.at(i))) {
 90   cout << read_mu_text(ingredients.at(i).at(/*skip alloc id*/1));
 91 }
 92 
 93 :(scenario unicode_text)
 94 def main [
 95   1:text <- new [♠]
 96   stash [foo:], 1:text
 97 ]
 98 +app: foo: ♠
 99 
100 :(scenario stash_space_after_text)
101 def main [
102   1:text <- new [abc]
103   stash 1:text, [foo]
104 ]
105 +app: abc foo
106 
107 :(scenario stash_text_as_array)
108 def main [
109   1:text <- new [abc]
110   stash *1:text
111 ]
112 +app: 3 97 98 99
113 
114 //: fixes way more than just stash
115 :(before "End Preprocess is_mu_text(reagent x)")
116 if (!canonize_type(x)) return false;
117 
118 //: Allocate more to routine when initializing a literal text
119 :(scenario new_text_overflow)
120 % Initial_memory_per_routine = 3;
121 def main [
122   10:&:num/raw <- new number:type
123   20:text/raw <- new [a]  # not enough room in initial page, if you take the array length into account
124 ]
125 +new: routine allocated memory from 1000 to 1003
126 +new: routine allocated memory from 1003 to 1006
127 
128 //: helpers
129 :(code)
130 int unicode_length(const string& s) {
131   const char* in = s.c_str();
132   int result = 0;
133   int curr = 0;
134   while (curr < SIZE(s)) {  // carefully bounds-check on the string
135     // before accessing its raw pointer
136     ++result;
137     curr += tb_utf8_char_length(in[curr]);
138   }
139   return result;
140 }
141 
142 string read_mu_text(int address) {
143   if (address == 0) return "";
144   int length = get_or_insert(Memory, address+/*alloc id*/1);
145   if (length == 0) return "";
146   return read_mu_characters(address+/*alloc id*/1+/*length*/1, length);
147 }
148 
149 string read_mu_characters(int start, int length) {
150   ostringstream tmp;
151   for (int curr = start;  curr < start+length;  ++curr)
152     tmp << to_unicode(static_cast<uint32_t>(get_or_insert(Memory, curr)));
153   return tmp.str();
154 }
155 
156 //:: some miscellaneous helpers now that we have text
157 
158 //: assert: perform sanity checks at runtime
159 
160 :(scenario assert_literal)
161 % Hide_errors = true;  // '%' lines insert arbitrary C code into tests before calling 'run' with the lines below. Must be immediately after :(scenario) line.
162 def main [
163   assert 0, [this is an assert in Mu]
164 ]
165 +error: this is an assert in Mu
166 
167 :(scenario assert)
168 % Hide_errors = true;  // '%' lines insert arbitrary C code into tests before calling 'run' with the lines below. Must be immediately after :(scenario) line.
169 def main [
170   1:text <- new [this is an assert in Mu]
171   assert 0, 1:text
172 ]
173 +error: this is an assert in Mu
174 
175 :(before "End Primitive Recipe Declarations")
176 ASSERT,
177 :(before "End Primitive Recipe Numbers")
178 put(Recipe_ordinal, "assert", ASSERT);
179 :(before "End Primitive Recipe Checks")
180 case ASSERT: {
181   if (SIZE(inst.ingredients) != 2) {
182     raise << maybe(get(Recipe, r).name) << "'assert' takes exactly two ingredients rather than '" << to_original_string(inst) << "'\n" << end();
183     break;
184   }
185   if (!is_mu_address(inst.ingredients.at(0)) && !is_mu_scalar(inst.ingredients.at(0))) {
186     raise << maybe(get(Recipe, r).name) << "'assert' requires a scalar or address for its first ingredient, but got '" << inst.ingredients.at(0).original_string << "'\n" << end();
187     break;
188   }
189   if (!is_literal_text(inst.ingredients.at(1)) && !is_mu_text(inst.ingredients.at(1))) {
190     raise << maybe(get(Recipe, r).name) << "'assert' requires a text as its second ingredient, but got '" << inst.ingredients.at(1).original_string << "'\n" << end();
191     break;
192   }
193   break;
194 }
195 :(before "End Primitive Recipe Implementations")
196 case ASSERT: {
197   if (!scalar_ingredient(ingredients, 0)) {
198     if (is_literal_text(current_instruction().ingredients.at(1)))
199       raise << current_instruction().ingredients.at(1).name << '\n' << end();
200     else
201       raise << read_mu_text(ingredients.at(1).at(/*skip alloc id*/1)) << '\n' << end();
202     if (!Hide_errors) exit(1);
203   }
204   break;
205 }
206 
207 //: 'cheating' by using the host system
208 
209 :(before "End Primitive Recipe Declarations")
210 _READ,
211 :(before "End Primitive Recipe Numbers")
212 put(Recipe_ordinal, "$read", _READ);
213 :(before "End Primitive Recipe Checks")
214 case _READ: {
215   break;
216 }
217 :(before "End Primitive Recipe Implementations")
218 case _READ: {
219   skip_whitespace(cin);
220   string result;
221   if (has_data(cin))
222     cin >> result;
223   products.resize(1);
224   products.at(0).push_back(new_mu_text(result));
225   break;
226 }
227 
228 :(code)
229 void skip_whitespace(istream& in) {
230   while (true) {
231     if (!has_data(in)) break;
232     if (isspace(in.peek())) in.get();
233     else break;
234   }
235 }