1 // A universal hash function that can handle objects of any type.
  2 //
  3 // The way it's currently implemented, two objects will have the same hash if
  4 // all their non-address fields (all the way down) expand to the same sequence
  5 // of scalar values. In particular, a container with all zero addresses hashes
  6 // to 0. Hopefully this won't be an issue because we are usually hashing
  7 // objects of a single type in any given hash table.
  8 //
  9 // Based on http://burtleburtle.net/bob/hash/hashfaq.html
 10 
 11 :(before "End Primitive Recipe Declarations")
 12 HASH,
 13 :(before "End Primitive Recipe Numbers")
 14 put(Recipe_ordinal, "hash", HASH);
 15 :(before "End Primitive Recipe Checks")
 16 case HASH: {
 17   if (SIZE(inst.ingredients) != 1) {
 18     raise << maybe(get(Recipe, r).name) << "'hash' takes exactly one ingredient rather than '" << inst.original_string << "'\n" << end();
 19     break;
 20   }
 21   break;
 22 }
 23 :(before "End Primitive Recipe Implementations")
 24 case HASH: {
 25   const reagent& input = current_instruction().ingredients.at(0);
 26   products.resize(1);
 27   products.at(0).push_back(hash(0, input));
 28   break;
 29 }
 30 
 31 //: in all the code below, the intermediate results of hashing are threaded through 'h'
 32 
 33 :(code)
 34 size_t hash(size_t h, reagent/*copy*/ r) {
 35   canonize(r);
 36   if (is_mu_text(r))  // optimization
 37     return hash_mu_text(h, r);
 38   else if (is_mu_address(r))
 39     return hash_mu_address(h, r);
 40   else if (is_mu_scalar(r))
 41     return hash_mu_scalar(h, r);
 42   else if (is_mu_array(r))
 43     return hash_mu_array(h, r);
 44   else if (is_mu_container(r))
 45     return hash_mu_container(h, r);
 46   else if (is_mu_exclusive_container(r))
 47     return hash_mu_exclusive_container(h, r);
 48   assert(false);
 49 }
 50 
 51 size_t hash_mu_scalar(size_t h, const reagent& r) {
 52   double input = is_literal(r) ? r.value : get_or_insert(Memory, r.value);
 53   return hash_iter(h, static_cast<size_t>(input));
 54 }
 55 
 56 size_t hash_mu_address(size_t h, reagent& r) {
 57   if (r.value == 0) return 0;
 58   trace(9999, "mem") << "location " << r.value << " is " << no_scientific(get_or_insert(Memory, r.value)) << end();
 59   r.set_value(get_or_insert(Memory, r.value));
 60   if (r.value != 0) {
 61     trace(9999, "mem") << "skipping refcount at " << r.value << end();
 62     r.set_value(r.value+1);  // skip refcount
 63   }
 64   drop_from_type(r, "address");
 65   return hash(h, r);
 66 }
 67 
 68 size_t hash_mu_text(size_t h, const reagent& r) {
 69   string input = read_mu_text(get_or_insert(Memory, r.value));
 70   for (int i = 0;  i < SIZE(input);  ++i) {
 71     h = hash_iter(h, static_cast<size_t>(input.at(i)));
 72 //?     cerr << i << ": " << h << '\n';
 73   }
 74   return h;
 75 }
 76 
 77 size_t hash_mu_array(size_t h, const reagent& r) {
 78   int size = get_or_insert(Memory, r.value);
 79   reagent/*copy*/ elem = r;
 80   delete elem.type;
 81   elem.type = copy_array_element(r.type);
 82   for (int i=0, address = r.value+1;  i < size;  ++i, address += size_of(elem)) {
 83     reagent/*copy*/ tmp = elem;
 84     tmp.set_value(address);
 85     h = hash(h, tmp);
 86 //?     cerr << i << " (" << address << "): " << h << '\n';
 87   }
 88   return h;
 89 }
 90 
 91 size_t hash_mu_container(size_t h, const reagent& r) {
 92   type_info& info = get(Type, get_base_type(r.type)->value);
 93   int address = r.value;
 94   int offset = 0;
 95   for (int i = 0;  i < SIZE(info.elements);  ++i) {
 96     reagent/*copy*/ element = element_type(r.type, i);
 97     if (has_property(element, "ignore-for-hash")) continue;
 98     element.set_value(address+offset);
 99     h = hash(h, element);
100 //?     cerr << i << ": " << h << '\n';
101     offset += size_of(info.elements.at(i).type);
102   }
103   return h;
104 }
105 
106 size_t hash_mu_exclusive_container(size_t h, const reagent& r) {
107   const type_tree* type = get_base_type(r.type);
108   assert(type->value);
109   int tag = get(Memory, r.value);
110   reagent/*copy*/ variant = variant_type(r, tag);
111   // todo: move this error to container definition time
112   if (has_property(variant, "ignore-for-hash"))
113     raise << get(Type, type->value).name << ": /ignore-for-hash won't work in exclusive containers\n" << end();
114   variant.set_value(r.value + /*skip tag*/1);
115   h = hash(h, variant);
116   return h;
117 }
118 
119 size_t hash_iter(size_t h, size_t input) {
120   h += input;
121   h += (h<<10);
122   h ^= (h>>6);
123 
124   h += (h<<3);
125   h ^= (h>>11);
126   h += (h<<15);
127   return h;
128 }
129 
130 :(scenario hash_container_checks_all_elements)
131 container foo [
132   x:num
133   y:char
134 ]
135 def main [
136   1:foo <- merge 34, 97/a
137   3:num <- hash 1:foo
138   return-unless 3:num
139   4:foo <- merge 34, 98/a
140   6:num <- hash 4:foo
141   return-unless 6:num
142   7:bool <- equal 3:num, 6:num
143 ]
144 # hash on containers includes all elements
145 +mem: storing 0 in location 7
146 
147 :(scenario hash_exclusive_container_checks_all_elements)
148 exclusive-container foo [
149   x:bar
150   y:num
151 ]
152 container bar [
153   a:num
154   b:num
155 ]
156 def main [
157   1:foo <- merge 0/x, 34, 35
158   4:num <- hash 1:foo
159   return-unless 4:num
160   5:foo <- merge 0/x, 34, 36
161   8:num <- hash 5:foo
162   return-unless 8:num
163   9:bool <- equal 4:num, 8:num
164 ]
165 # hash on containers includes all elements
166 +mem: storing 0 in location 9
167 
168 :(scenario hash_can_ignore_container_elements)
169 container foo [
170   x:num
171   y:char/ignore-for-hash
172 ]
173 def main [
174   1:foo <- merge 34, 97/a
175   3:num <- hash 1:foo
176   return-unless 3:num
177   4:foo <- merge 34, 98/a
178   6:num <- hash 4:foo
179   return-unless 6:num
180   7:bool <- equal 3:num, 6:num
181 ]
182 # hashes match even though y is different
183 +mem: storing 1 in location 7
184 
185 //: These properties aren't necessary for hash, they just test that the
186 //: current implementation works like we think it does.
187 
188 :(scenario hash_of_zero_address)
189 def main [
190   1:&:num <- copy 0
191   2:num <- hash 1:&:num
192 ]
193 +mem: storing 0 in location 2
194 
195 //: This is probably too aggressive, but we need some way to avoid depending
196 //: on the precise bit pattern of a floating-point number.
197 :(scenario hash_of_numbers_ignores_fractional_part)
198 def main [
199   1:num <- hash 1.5
200   2:num <- hash 1
201   3:bool <- equal 1:num, 2:num
202 ]
203 +mem: storing 1 in location 3
204 
205 :(scenario hash_of_array_same_as_string)
206 def main [
207   10:num <- copy 3
208   11:num <- copy 97
209   12:num <- copy 98
210   13:num <- copy 99
211   2:num <- hash 10:@:num/unsafe
212   return-unless 2:num
213   3:text <- new [abc]
214   4:num <- hash 3:text
215   return-unless 4:num
216   5:bool <- equal 2:num, 4:num
217 ]
218 +mem: storing 1 in location 5
219 
220 :(scenario hash_ignores_address_value)
221 def main [
222   1:&:num <- new number:type
223   *1:&:num <- copy 34
224   2:num <- hash 1:&:num
225   3:&:num <- new number:type
226   *3:&:num <- copy 34
227   4:num <- hash 3:&:num
228   5:bool <- equal 2:num, 4:num
229 ]
230 # different addresses hash to the same result as long as the values the point to do so
231 +mem: storing 1 in location 5
232 
233 :(scenario hash_ignores_address_refcount)
234 def main [
235   1:&:num <- new number:type
236   *1:&:num <- copy 34
237   2:num <- hash 1:&:num
238   return-unless 2:num
239   # increment refcount
240   3:&:num <- copy 1:&:num
241   4:num <- hash 3:&:num
242   return-unless 4:num
243   5:bool <- equal 2:num, 4:num
244 ]
245 # hash doesn't change when refcount changes
246 +mem: storing 1 in location 5
247 
248 :(scenario hash_container_depends_only_on_elements)
249 container foo [
250   x:num
251   y:char
252 ]
253 container bar [
254   x:num
255   y:char
256 ]
257 def main [
258   1:foo <- merge 34, 97/a
259   3:num <- hash 1:foo
260   return-unless 3:num
261   4:bar <- merge 34, 97/a
262   6:num <- hash 4:bar
263   return-unless 6:num
264   7:bool <- equal 3:num, 6:num
265 ]
266 # containers with identical elements return identical hashes
267 +mem: storing 1 in location 7
268 
269 :(scenario hash_container_depends_only_on_elements_2)
270 container foo [
271   x:num
272   y:char
273   z:&:num
274 ]
275 def main [
276   1:&:num <- new number:type
277   *1:&:num <- copy 34
278   2:foo <- merge 34, 97/a, 1:&:num
279   5:num <- hash 2:foo
280   return-unless 5:num
281   6:&:num <- new number:type
282   *6:&:num <- copy 34
283   7:foo <- merge 34, 97/a, 6:&:num
284   10:num <- hash 7:foo
285   return-unless 10:num
286   11:bool <- equal 5:num, 10:num
287 ]
288 # containers with identical 'leaf' elements return identical hashes
289 +mem: storing 1 in location 11
290 
291 :(scenario hash_container_depends_only_on_elements_3)
292 container foo [
293   x:num
294   y:char
295   z:bar
296 ]
297 container bar [
298   x:num
299   y:num
300 ]
301 def main [
302   1:foo <- merge 34, 97/a, 47, 48
303   6:num <- hash 1:foo
304   return-unless 6:num
305   7:foo <- merge 34, 97/a, 47, 48
306   12:num <- hash 7:foo
307   return-unless 12:num
308   13:bool <- equal 6:num, 12:num
309 ]
310 # containers with identical 'leaf' elements return identical hashes
311 +mem: storing 1 in location 13
312 
313 :(scenario hash_exclusive_container_ignores_tag)
314 exclusive-container foo [
315   x:bar
316   y:num
317 ]
318 container bar [
319   a:num
320   b:num
321 ]
322 def main [
323   1:foo <- merge 0/x, 34, 35
324   4:num <- hash 1:foo
325   return-unless 4:num
326   5:bar <- merge 34, 35
327   7:num <- hash 5:bar
328   return-unless 7:num
329   8:bool <- equal 4:num, 7:num
330 ]
331 # hash on containers includes all elements
332 +mem: storing 1 in location 8
333 
334 //: An older version that supported only strings.
335 //: Hash functions are subtle and easy to get wrong, so we keep the old
336 //: version around and check that the new one is consistent with it.
337 
338 :(scenario hash_matches_old_version)
339 def main [
340   1:text <- new [abc]
341   2:num <- hash 1:text
342   3:num <- hash_old 1:text
343   4:bool <- equal 2:num, 3:num
344 ]
345 +mem: storing 1 in location 4
346 
347 :(before "End Primitive Recipe Declarations")
348 HASH_OLD,
349 :(before "End Primitive Recipe Numbers")
350 put(Recipe_ordinal, "hash_old", HASH_OLD);
351 :(before "End Primitive Recipe Checks")
352 case HASH_OLD: {
353   if (SIZE(inst.ingredients) != 1) {
354     raise << maybe(get(Recipe, r).name) << "'hash_old' takes exactly one ingredient rather than '" << inst.original_string << "'\n" << end();
355     break;
356   }
357   if (!is_mu_text(inst.ingredients.at(0))) {
358     raise << maybe(get(Recipe, r).name) << "'hash_old' currently only supports texts (address array character), but got '" << inst.ingredients.at(0).original_string << "'\n" << end();
359     break;
360   }
361   break;
362 }
363 :(before "End Primitive Recipe Implementations")
364 case HASH_OLD: {
365   string input = read_mu_text(ingredients.at(0).at(0));
366   size_t h = 0 ;
367 
368   for (int i = 0;  i < SIZE(input);  ++i) {
369     h += static_cast<size_t>(input.at(i));
370     h += (h<<10);
371     h ^= (h>>6);
372 
373     h += (h<<3);
374     h ^= (h>>11);
375     h += (h<<15);
376   }
377 
378   products.resize(1);
379   products.at(0).push_back(h);
380   break;
381 }