1 // Compute a hash for objects of any type.
  2 //
  3 // The way it's currently implemented, two objects will have the same hash if
  4 // all their non-address fields (all the way down) expand to the same sequence
  5 // of scalar values. In particular, a container with all zero addresses hashes
  6 // to 0. Hopefully this won't be an issue because we are usually hashing
  7 // objects of a single type in any given hash table.
  8 //
  9 // Based on http://burtleburtle.net/bob/hash/hashfaq.html
 10 
 11 :(before "End Primitive Recipe Declarations")
 12 HASH,
 13 :(before "End Primitive Recipe Numbers")
 14 put(Recipe_ordinal, "hash", HASH);
 15 :(before "End Primitive Recipe Checks")
 16 case HASH: {
 17   if (SIZE(inst.ingredients) != 1) {
 18     raise << maybe(get(Recipe, r).name) << "'hash' takes exactly one ingredient rather than '" << to_original_string(inst) << "'\n" << end();
 19     break;
 20   }
 21   break;
 22 }
 23 :(before "End Primitive Recipe Implementations")
 24 case HASH: {
 25   const reagent& input = current_instruction().ingredients.at(0);
 26   products.resize(1);
 27   products.at(0).push_back(hash(0, input));
 28   break;
 29 }
 30 
 31 //: in all the code below, the intermediate results of hashing are threaded through 'h'
 32 
 33 :(code)
 34 size_t hash(size_t h, reagent/*copy*/ r) {
 35   canonize(r);
 36   if (is_mu_text(r))  // optimization
 37     return hash_mu_text(h, r);
 38   else if (is_mu_address(r))
 39     return hash_mu_address(h, r);
 40   else if (is_mu_scalar(r))
 41     return hash_mu_scalar(h, r);
 42   else if (is_mu_array(r))
 43     return hash_mu_array(h, r);
 44   else if (is_mu_container(r))
 45     return hash_mu_container(h, r);
 46   else if (is_mu_exclusive_container(r))
 47     return hash_mu_exclusive_container(h, r);
 48   assert(false);
 49 }
 50 
 51 size_t hash_mu_scalar(size_t h, const reagent& r) {
 52   double input = is_literal(r) ? r.value : get_or_insert(Memory, r.value);
 53   return hash_iter(h, static_cast<size_t>(input));
 54 }
 55 
 56 size_t hash_mu_address(size_t h, reagent& r) {
 57   if (r.value == 0) return 0;
 58   trace("mem") << "location " << r.value << " is " << no_scientific(get_or_insert(Memory, r.value)) << end();
 59   r.set_value(get_or_insert(Memory, r.value));
 60   drop_from_type(r, "address");
 61   return hash(h, r);
 62 }
 63 
 64 size_t hash_mu_text(size_t h, const reagent& r) {
 65   string input = read_mu_text(get_or_insert(Memory, r.value));
 66   for (int i = 0;  i < SIZE(input);  ++i) {
 67     h = hash_iter(h, static_cast<size_t>(input.at(i)));
 68 //?     cerr << i << ": " << h << '\n';
 69   }
 70   return h;
 71 }
 72 
 73 size_t hash_mu_array(size_t h, const reagent& r) {
 74   int size = get_or_insert(Memory, r.value);
 75   reagent/*copy*/ elem = r;
 76   delete elem.type;
 77   elem.type = copy_array_element(r.type);
 78   for (int i=0, address = r.value+1;  i < size;  ++i, address += size_of(elem)) {
 79     reagent/*copy*/ tmp = elem;
 80     tmp.set_value(address);
 81     h = hash(h, tmp);
 82 //?     cerr << i << " (" << address << "): " << h << '\n';
 83   }
 84   return h;
 85 }
 86 
 87 size_t hash_mu_container(size_t h, const reagent& r) {
 88   type_info& info = get(Type, get_base_type(r.type)->value);
 89   int address = r.value;
 90   int offset = 0;
 91   for (int i = 0;  i < SIZE(info.elements);  ++i) {
 92     reagent/*copy*/ element = element_type(r.type, i);
 93     if (has_property(element, "ignore-for-hash")) continue;
 94     element.set_value(address+offset);
 95     h = hash(h, element);
 96 //?     cerr << i << ": " << h << '\n';
 97     offset += size_of(info.elements.at(i).type);
 98   }
 99   return h;
100 }
101 
102 size_t hash_mu_exclusive_container(size_t h, const reagent& r) {
103   const type_tree* type = get_base_type(r.type);
104   assert(type->value);
105   int tag = get(Memory, r.value);
106   reagent/*copy*/ variant = variant_type(r, tag);
107   // todo: move this error to container definition time
108   if (has_property(variant, "ignore-for-hash"))
109     raise << get(Type, type->value).name << ": /ignore-for-hash won't work in exclusive containers\n" << end();
110   variant.set_value(r.value + /*skip tag*/1);
111   h = hash(h, variant);
112   return h;
113 }
114 
115 size_t hash_iter(size_t h, size_t input) {
116   h += input;
117   h += (h<<10);
118   h ^= (h>>6);
119 
120   h += (h<<3);
121   h ^= (h>>11);
122   h += (h<<15);
123   return h;
124 }
125 
126 :(scenario hash_container_checks_all_elements)
127 container foo [
128   x:num
129   y:char
130 ]
131 def main [
132   1:foo <- merge 34, 97/a
133   3:num <- hash 1:foo
134   return-unless 3:num
135   4:foo <- merge 34, 98/a
136   6:num <- hash 4:foo
137   return-unless 6:num
138   7:bool <- equal 3:num, 6:num
139 ]
140 # hash on containers includes all elements
141 +mem: storing 0 in location 7
142 
143 :(scenario hash_exclusive_container_checks_all_elements)
144 exclusive-container foo [
145   x:bar
146   y:num
147 ]
148 container bar [
149   a:num
150   b:num
151 ]
152 def main [
153   1:foo <- merge 0/x, 34, 35
154   4:num <- hash 1:foo
155   return-unless 4:num
156   5:foo <- merge 0/x, 34, 36
157   8:num <- hash 5:foo
158   return-unless 8:num
159   9:bool <- equal 4:num, 8:num
160 ]
161 # hash on containers includes all elements
162 +mem: storing 0 in location 9
163 
164 :(scenario hash_can_ignore_container_elements)
165 container foo [
166   x:num
167   y:char/ignore-for-hash
168 ]
169 def main [
170   1:foo <- merge 34, 97/a
171   3:num <- hash 1:foo
172   return-unless 3:num
173   4:foo <- merge 34, 98/a
174   6:num <- hash 4:foo
175   return-unless 6:num
176   7:bool <- equal 3:num, 6:num
177 ]
178 # hashes match even though y is different
179 +mem: storing 1 in location 7
180 
181 //: These properties aren't necessary for hash, they just test that the
182 //: current implementation works like we think it does.
183 
184 :(scenario hash_of_zero_address)
185 def main [
186   1:&:num <- copy 0
187   2:num <- hash 1:&:num
188 ]
189 +mem: storing 0 in location 2
190 
191 //: This is probably too aggressive, but we need some way to avoid depending
192 //: on the precise bit pattern of a floating-point number.
193 :(scenario hash_of_numbers_ignores_fractional_part)
194 def main [
195   1:num <- hash 1.5
196   2:num <- hash 1
197   3:bool <- equal 1:num, 2:num
198 ]
199 +mem: storing 1 in location 3
200 
201 :(scenario hash_of_array_same_as_string)
202 def main [
203   10:num <- copy 3
204   11:num <- copy 97
205   12:num <- copy 98
206   13:num <- copy 99
207   2:num <- hash 10:@:num/unsafe
208   return-unless 2:num
209   3:text <- new [abc]
210   4:num <- hash 3:text
211   return-unless 4:num
212   5:bool <- equal 2:num, 4:num
213 ]
214 +mem: storing 1 in location 5
215 
216 :(scenario hash_ignores_address_value)
217 def main [
218   1:&:num <- new number:type
219   *1:&:num <- copy 34
220   2:num <- hash 1:&:num
221   3:&:num <- new number:type
222   *3:&:num <- copy 34
223   4:num <- hash 3:&:num
224   5:bool <- equal 2:num, 4:num
225 ]
226 # different addresses hash to the same result as long as the values the point to do so
227 +mem: storing 1 in location 5
228 
229 :(scenario hash_container_depends_only_on_elements)
230 container foo [
231   x:num
232   y:char
233 ]
234 container bar [
235   x:num
236   y:char
237 ]
238 def main [
239   1:foo <- merge 34, 97/a
240   3:num <- hash 1:foo
241   return-unless 3:num
242   4:bar <- merge 34, 97/a
243   6:num <- hash 4:bar
244   return-unless 6:num
245   7:bool <- equal 3:num, 6:num
246 ]
247 # containers with identical elements return identical hashes
248 +mem: storing 1 in location 7
249 
250 :(scenario hash_container_depends_only_on_elements_2)
251 container foo [
252   x:num
253   y:char
254   z:&:num
255 ]
256 def main [
257   1:&:num <- new number:type
258   *1:&:num <- copy 34
259   2:foo <- merge 34, 97/a, 1:&:num
260   5:num <- hash 2:foo
261   return-unless 5:num
262   6:&:num <- new number:type
263   *6:&:num <- copy 34
264   7:foo <- merge 34, 97/a, 6:&:num
265   10:num <- hash 7:foo
266   return-unless 10:num
267   11:bool <- equal 5:num, 10:num
268 ]
269 # containers with identical 'leaf' elements return identical hashes
270 +mem: storing 1 in location 11
271 
272 :(scenario hash_container_depends_only_on_elements_3)
273 container foo [
274   x:num
275   y:char
276   z:bar
277 ]
278 container bar [
279   x:num
280   y:num
281 ]
282 def main [
283   1:foo <- merge 34, 97/a, 47, 48
284   6:num <- hash 1:foo
285   return-unless 6:num
286   7:foo <- merge 34, 97/a, 47, 48
287   12:num <- hash 7:foo
288   return-unless 12:num
289   13:bool <- equal 6:num, 12:num
290 ]
291 # containers with identical 'leaf' elements return identical hashes
292 +mem: storing 1 in location 13
293 
294 :(scenario hash_exclusive_container_ignores_tag)
295 exclusive-container foo [
296   x:bar
297   y:num
298 ]
299 container bar [
300   a:num
301   b:num
302 ]
303 def main [
304   1:foo <- merge 0/x, 34, 35
305   4:num <- hash 1:foo
306   return-unless 4:num
307   5:bar <- merge 34, 35
308   7:num <- hash 5:bar
309   return-unless 7:num
310   8:bool <- equal 4:num, 7:num
311 ]
312 # hash on containers includes all elements
313 +mem: storing 1 in location 8
314 
315 //: An older version that supported only strings.
316 //: Hash functions are subtle and easy to get wrong, so we keep the old
317 //: version around and check that the new one is consistent with it.
318 
319 :(scenario hash_matches_old_version)
320 def main [
321   1:text <- new [abc]
322   2:num <- hash 1:text
323   3:num <- hash_old 1:text
324   4:bool <- equal 2:num, 3:num
325 ]
326 +mem: storing 1 in location 4
327 
328 :(before "End Primitive Recipe Declarations")
329 HASH_OLD,
330 :(before "End Primitive Recipe Numbers")
331 put(Recipe_ordinal, "hash_old", HASH_OLD);
332 :(before "End Primitive Recipe Checks")
333 case HASH_OLD: {
334   if (SIZE(inst.ingredients) != 1) {
335     raise << maybe(get(Recipe, r).name) << "'hash_old' takes exactly one ingredient rather than '" << to_original_string(inst) << "'\n" << end();
336     break;
337   }
338   if (!is_mu_text(inst.ingredients.at(0))) {
339     raise << maybe(get(Recipe, r).name) << "'hash_old' currently only supports texts (address array character), but got '" << inst.ingredients.at(0).original_string << "'\n" << end();
340     break;
341   }
342   break;
343 }
344 :(before "End Primitive Recipe Implementations")
345 case HASH_OLD: {
346   string input = read_mu_text(ingredients.at(0).at(0));
347   size_t h = 0 ;
348 
349   for (int i = 0;  i < SIZE(input);  ++i) {
350     h += static_cast<size_t>(input.at(i));
351     h += (h<<10);
352     h ^= (h>>6);
353 
354     h += (h<<3);
355     h ^= (h>>11);
356     h += (h<<15);
357   }
358 
359   products.resize(1);
360   products.at(0).push_back(h);
361   break;
362 }