038new_text.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

//: Extend 'new' to handle a unicode string literal argument or 'text'.

//: A Mu text is an address to an array of characters.
:(before "End Mu Types Initialization")
put(Type_abbreviations, "text", new_type_tree("address:array:character"));

:(scenario new_string)
def main [
  1:text <- new [abc def]
  2:char <- index *1:text, 5
]
# number code for 'e'
+mem: storing 101 in location 2

:(scenario new_string_handles_unicode)
def main [
  1:text <- new [a«c]
  2:num <- length *1:text
  3:char <- index *1:text, 1
]
+mem: storing 3 in location 2
# unicode for '«'
+mem: storing 171 in location 3

:(before "End NEW Check Special-cases")
if (is_literal_text(inst.ingredients.at(0))) break;
:(before "Convert 'new' To 'allocate'")
if (inst.name == "new" && is_literal_text(inst.ingredients.at(0))) continue;
:(after "case NEW" following "Primitive Recipe Implementations")
  if (is_literal_text(current_instruction().ingredients.at(0))) {
    products.resize(1);
    products.at(0).push_back(new_mu_text(current_instruction().ingredients.at(0).name));
    trace(9999, "mem") << "new string alloc: " << products.at(0).at(0) << end();
    break;
  }

:(code)
int new_mu_text(const string& contents) {
  // allocate an array just large enough for it
  int string_length = unicode_length(contents);
//?   Total_alloc += string_length+1;
//?   ++Num_alloc;
  int result = allocate(string_length+/*array length*/1);
  trace(9999, "mem") << "storing string refcount 0 in location " << result << end();
  put(Memory, result, 0);
  int curr_address = result+/*skip refcount*/1;
  trace(9999, "mem") << "storing string length " << string_length << " in location " << curr_address << end();
  put(Memory, curr_address, string_length);
  ++curr_address;  // skip length
  int curr = 0;
  const char* raw_contents = contents.c_str();
  for (int i = 0; i < string_length; ++i) {
    uint32_t curr_character;
    assert(curr < SIZE(contents));
    tb_utf8_char_to_unicode(&curr_character, &raw_contents[curr]);
    trace(9999, "mem") << "storing string character " << curr_character << " in location " << curr_address << end();
    put(Memory, curr_address, curr_character);
    curr += tb_utf8_char_length(raw_contents[curr]);
    ++curr_address;
  }
  // mu strings are not null-terminated in memory
  return result;
}

//: stash recognizes strings

:(scenario stash_string)
def main [
  1:text <- new [abc]
  stash [foo:], 1:text
]
+app: foo: abc

:(before "End print Special-cases(r, data)")
if (is_mu_text(r)) {
  assert(scalar(data));
  return read_mu_text(data.at(0));
}

:(scenario unicode_string)
def main [
  1:text <- new [♠]
  stash [foo:], 1:text
]
+app: foo: ♠

:(scenario stash_space_after_string)
def main [
  1:text <- new [abc]
  stash 1:text, [foo]
]
+app: abc foo

:(scenario stash_string_as_array)
def main [
  1:text <- new [abc]
  stash *1:text
]
+app: 3 97 98 99

//: fixes way more than just stash
:(before "End Preprocess is_mu_text(reagent x)")
if (!canonize_type(x)) return false;

//: Allocate more to routine when initializing a literal string
:(scenario new_string_overflow)
% Initial_memory_per_routine = 3;
def main [
  1:address:num/raw <- new number:type
  2:text/raw <- new [a]  # not enough room in initial page, if you take the refcount and array length into account
]
+new: routine allocated memory from 1000 to 1003
+new: routine allocated memory from 1003 to 1006

//: helpers
:(code)
int unicode_length(const string& s) {
  const char* in = s.c_str();
  int result = 0;
  int curr = 0;
  while (curr < SIZE(s)) {  // carefully bounds-check on the string
    // before accessing its raw pointer
    ++result;
    curr += tb_utf8_char_length(in[curr]);
  }
  return result;
}

string read_mu_text(int address) {
  if (address == 0) return "";
  ++address;  // skip refcount
  int size = get_or_insert(Memory, address);
  if (size == 0) return "";
  ostringstream tmp;
  for (int curr = address+1; curr <= address+size; ++curr) {
    tmp << to_unicode(static_cast<uint32_t>(get_or_insert(Memory, curr)));
  }
  return tmp.str();
}