diff options
author | Kartik K. Agaram <vc@akkartik.com> | 2016-03-14 01:00:48 -0700 |
---|---|---|
committer | Kartik K. Agaram <vc@akkartik.com> | 2016-03-14 01:00:48 -0700 |
commit | 2badd89a58b9666563746d71069abf16f05709ea (patch) | |
tree | 779c9feb243fc8d0f33051cd8323fd23f912f373 /078hash.cc | |
parent | 8b095f802129f8c328a3a4dc3de4443890d34d59 (diff) | |
download | mu-2badd89a58b9666563746d71069abf16f05709ea.tar.gz |
2778 - fix all layers
Diffstat (limited to '078hash.cc')
-rw-r--r-- | 078hash.cc | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/078hash.cc b/078hash.cc new file mode 100644 index 00000000..f8003cbe --- /dev/null +++ b/078hash.cc @@ -0,0 +1,392 @@ +// A universal hash function that can handle objects of any type. +// +// The way it's currently implemented, two objects will have the same hash if +// all their non-address fields (all the way down) expand to the same sequence +// of scalar values. In particular, a container with all zero addresses hashes +// to 0. Hopefully this won't be an issue because we are usually hashing +// objects of a single type in any given hash table. +// +// Based on http://burtleburtle.net/bob/hash/hashfaq.html + +:(before "End Primitive Recipe Declarations") +HASH, +:(before "End Primitive Recipe Numbers") +put(Recipe_ordinal, "hash", HASH); +:(before "End Primitive Recipe Checks") +case HASH: { + if (SIZE(inst.ingredients) != 1) { + raise << maybe(get(Recipe, r).name) << "'hash' takes exactly one ingredient rather than '" << to_string(inst) << "'\n" << end(); + break; + } + break; +} +:(before "End Primitive Recipe Implementations") +case HASH: { + reagent input = current_instruction().ingredients.at(0); // copy + products.resize(1); + products.at(0).push_back(hash(0, input)); + break; +} + +//: in all the code below, the intermediate results of hashing are threaded through 'h' + +:(code) +size_t hash(size_t h, reagent& r) { + canonize(r); + if (is_mu_string(r)) // optimization + return hash_mu_string(h, r); + else if (is_mu_address(r)) + return hash_mu_address(h, r); + else if (is_mu_scalar(r)) + return hash_mu_scalar(h, r); + else if (is_mu_array(r)) + return hash_mu_array(h, r); + else if (is_mu_container(r)) + return hash_mu_container(h, r); + else if (is_mu_exclusive_container(r)) + return hash_mu_exclusive_container(h, r); + assert(false); +} + +size_t hash_mu_scalar(size_t h, const reagent& r) { + double input = is_literal(r) ? r.value : get_or_insert(Memory, r.value); + return hash_iter(h, static_cast<size_t>(input)); +} + +size_t hash_mu_address(size_t h, reagent& r) { + if (r.value == 0) return 0; + r.value = get_or_insert(Memory, r.value); + drop_from_type(r, "address"); + if (r.type->name == "shared") { + ++r.value; + drop_from_type(r, "shared"); + } + return hash(h, r); +} + +size_t hash_mu_string(size_t h, const reagent& r) { + string input = read_mu_string(get_or_insert(Memory, r.value)); + for (int i = 0; i < SIZE(input); ++i) { + h = hash_iter(h, static_cast<size_t>(input.at(i))); +//? cerr << i << ": " << h << '\n'; + } + return h; +} + +size_t hash_mu_array(size_t h, const reagent& r) { + int size = get_or_insert(Memory, r.value); + reagent elem = r; + delete elem.type; + elem.type = new type_tree(*array_element(r.type)); + for (int i=0, address = r.value+1; i < size; ++i, address += size_of(elem)) { + reagent tmp = elem; + tmp.value = address; + h = hash(h, tmp); +//? cerr << i << " (" << address << "): " << h << '\n'; + } + return h; +} + +bool is_mu_container(const reagent& r) { + if (r.type->value == 0) return false; + type_info& info = get(Type, r.type->value); + return info.kind == CONTAINER; +} + +size_t hash_mu_container(size_t h, const reagent& r) { + assert(r.type->value); + type_info& info = get(Type, r.type->value); + int address = r.value; + int offset = 0; + for (int i = 0; i < SIZE(info.elements); ++i) { + reagent element = element_type(r, i); + if (has_property(element, "ignore-for-hash")) continue; + element.set_value(address+offset); + h = hash(h, element); +//? cerr << i << ": " << h << '\n'; + offset += size_of(info.elements.at(i).type); + } + return h; +} + +bool is_mu_exclusive_container(const reagent& r) { + if (r.type->value == 0) return false; + type_info& info = get(Type, r.type->value); + return info.kind == EXCLUSIVE_CONTAINER; +} + +size_t hash_mu_exclusive_container(size_t h, const reagent& r) { + assert(r.type->value); + int tag = get(Memory, r.value); + reagent variant = variant_type(r, tag); + // todo: move this error to container definition time + if (has_property(variant, "ignore-for-hash")) + raise << get(Type, r.type->value).name << ": /ignore-for-hash won't work in exclusive containers\n" << end(); + variant.set_value(r.value + /*skip tag*/1); + h = hash(h, variant); + return h; +} + +size_t hash_iter(size_t h, size_t input) { + h += input; + h += (h<<10); + h ^= (h>>6); + + h += (h<<3); + h ^= (h>>11); + h += (h<<15); + return h; +} + +:(scenario hash_container_checks_all_elements) +container foo [ + x:number + y:character +] +def main [ + 1:foo <- merge 34, 97/a + 3:number <- hash 1:foo + return-unless 3:number + 4:foo <- merge 34, 98/a + 6:number <- hash 4:foo + return-unless 6:number + 7:boolean <- equal 3:number, 6:number +] +# hash on containers includes all elements ++mem: storing 0 in location 7 + +:(scenario hash_exclusive_container_checks_all_elements) +exclusive-container foo [ + x:bar + y:number +] +container bar [ + a:number + b:number +] +def main [ + 1:foo <- merge 0/x, 34, 35 + 4:number <- hash 1:foo + return-unless 4:number + 5:foo <- merge 0/x, 34, 36 + 8:number <- hash 5:foo + return-unless 8:number + 9:boolean <- equal 4:number, 8:number +] +# hash on containers includes all elements ++mem: storing 0 in location 9 + +:(scenario hash_can_ignore_container_elements) +container foo [ + x:number + y:character/ignore-for-hash +] +def main [ + 1:foo <- merge 34, 97/a + 3:number <- hash 1:foo + return-unless 3:number + 4:foo <- merge 34, 98/a + 6:number <- hash 4:foo + return-unless 6:number + 7:boolean <- equal 3:number, 6:number +] +# hashes match even though y is different ++mem: storing 1 in location 7 + +//: These properties aren't necessary for hash, they just test that the +//: current implementation works like we think it does. + +:(scenario hash_of_zero_address) +def main [ + 1:address:number <- copy 0 + 2:number <- hash 1:address:number +] ++mem: storing 0 in location 2 + +//: This is probably too aggressive, but we need some way to avoid depending +//: on the precise bit pattern of a floating-point number. +:(scenario hash_of_numbers_ignores_fractional_part) +def main [ + 1:number <- hash 1.5 + 2:number <- hash 1 + 3:boolean <- equal 1:number, 2:number +] ++mem: storing 1 in location 3 + +:(scenario hash_of_array_same_as_string) +def main [ + 10:number <- copy 3 + 11:number <- copy 97 + 12:number <- copy 98 + 13:number <- copy 99 + 2:number <- hash 10:array:number/unsafe + return-unless 2:number + 3:address:shared:array:character <- new [abc] + 4:number <- hash 3:address:shared:array:character + return-unless 4:number + 5:boolean <- equal 2:number, 4:number +] ++mem: storing 1 in location 5 + +:(scenario hash_ignores_address_value) +def main [ + 1:address:shared:number <- new number:type + *1:address:shared:number <- copy 34 + 2:number <- hash 1:address:shared:number + 3:address:shared:number <- new number:type + *3:address:shared:number <- copy 34 + 4:number <- hash 3:address:shared:number + 5:boolean <- equal 2:number, 4:number +] +# different addresses hash to the same result as long as the values the point to do so ++mem: storing 1 in location 5 + +:(scenario hash_ignores_address_refcount) +def main [ + 1:address:shared:number <- new number:type + *1:address:shared:number <- copy 34 + 2:number <- hash 1:address:shared:number + return-unless 2:number + # increment refcount + 3:address:shared:number <- copy 1:address:shared:number + 4:number <- hash 3:address:shared:number + return-unless 4:number + 5:boolean <- equal 2:number, 4:number +] +# hash doesn't change when refcount changes ++mem: storing 1 in location 5 + +:(scenario hash_container_depends_only_on_elements) +container foo [ + x:number + y:character +] +container bar [ + x:number + y:character +] +def main [ + 1:foo <- merge 34, 97/a + 3:number <- hash 1:foo + return-unless 3:number + 4:bar <- merge 34, 97/a + 6:number <- hash 4:bar + return-unless 6:number + 7:boolean <- equal 3:number, 6:number +] +# containers with identical elements return identical hashes ++mem: storing 1 in location 7 + +:(scenario hash_container_depends_only_on_elements_2) +container foo [ + x:number + y:character + z:address:shared:number +] +def main [ + 1:address:shared:number <- new number:type + *1:address:shared:number <- copy 34 + 2:foo <- merge 34, 97/a, 1:address:shared:number + 5:number <- hash 2:foo + return-unless 5:number + 6:address:shared:number <- new number:type + *6:address:shared:number <- copy 34 + 7:foo <- merge 34, 97/a, 6:address:shared:number + 10:number <- hash 7:foo + return-unless 10:number + 11:boolean <- equal 5:number, 10:number +] +# containers with identical 'leaf' elements return identical hashes ++mem: storing 1 in location 11 + +:(scenario hash_container_depends_only_on_elements_3) +container foo [ + x:number + y:character + z:bar +] +container bar [ + x:number + y:number +] +def main [ + 1:foo <- merge 34, 97/a, 47, 48 + 6:number <- hash 1:foo + return-unless 6:number + 7:foo <- merge 34, 97/a, 47, 48 + 12:number <- hash 7:foo + return-unless 12:number + 13:boolean <- equal 6:number, 12:number +] +# containers with identical 'leaf' elements return identical hashes ++mem: storing 1 in location 13 + +:(scenario hash_exclusive_container_ignores_tag) +exclusive-container foo [ + x:bar + y:number +] +container bar [ + a:number + b:number +] +def main [ + 1:foo <- merge 0/x, 34, 35 + 4:number <- hash 1:foo + return-unless 4:number + 5:bar <- merge 34, 35 + 7:number <- hash 5:bar + return-unless 7:number + 8:boolean <- equal 4:number, 7:number +] +# hash on containers includes all elements ++mem: storing 1 in location 8 + +//: An older version that supported only strings. +//: Hash functions are subtle and easy to get wrong, so we keep the old +//: version around and check that the new one is consistent with it. + +:(scenario hash_matches_old_version) +def main [ + 1:address:shared:array:character <- new [abc] + 2:number <- hash 1:address:shared:array:character + 3:number <- hash_old 1:address:shared:array:character + 4:boolean <- equal 2:number, 3:number +] ++mem: storing 1 in location 4 + +:(before "End Primitive Recipe Declarations") +HASH_OLD, +:(before "End Primitive Recipe Numbers") +put(Recipe_ordinal, "hash_old", HASH_OLD); +:(before "End Primitive Recipe Checks") +case HASH_OLD: { + if (SIZE(inst.ingredients) != 1) { + raise << maybe(get(Recipe, r).name) << "'hash_old' takes exactly one ingredient rather than '" << to_string(inst) << "'\n" << end(); + break; + } + if (!is_mu_string(inst.ingredients.at(0))) { + raise << maybe(get(Recipe, r).name) << "'hash_old' currently only supports strings (address:shared:array:character), but got " << inst.ingredients.at(0).original_string << '\n' << end(); + break; + } + break; +} +:(before "End Primitive Recipe Implementations") +case HASH_OLD: { + string input = read_mu_string(ingredients.at(0).at(0)); + size_t h = 0 ; + + for (int i = 0; i < SIZE(input); ++i) { + h += static_cast<size_t>(input.at(i)); + h += (h<<10); + h ^= (h>>6); + + h += (h<<3); + h ^= (h>>11); + h += (h<<15); + } + + products.resize(1); + products.at(0).push_back(h); + break; +} |