diff options
author | Kartik Agaram <vc@akkartik.com> | 2020-01-01 17:04:37 -0800 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2020-01-01 17:04:37 -0800 |
commit | 2a4088119cf41175457414dfa59bd4064b8f0562 (patch) | |
tree | 64fe184e399f9870ebd481a90eec34d51e5dff68 /archive/2.vm/069hash.cc | |
parent | 23fd294d85959c6b476bcdc35ed6ad508cc99b8f (diff) | |
download | mu-2a4088119cf41175457414dfa59bd4064b8f0562.tar.gz |
5852
Diffstat (limited to 'archive/2.vm/069hash.cc')
-rw-r--r-- | archive/2.vm/069hash.cc | 422 |
1 files changed, 0 insertions, 422 deletions
diff --git a/archive/2.vm/069hash.cc b/archive/2.vm/069hash.cc deleted file mode 100644 index 67897d00..00000000 --- a/archive/2.vm/069hash.cc +++ /dev/null @@ -1,422 +0,0 @@ -// Compute a hash for objects of any type. -// -// The way it's currently implemented, two objects will have the same hash if -// all their non-address fields (all the way down) expand to the same sequence -// of scalar values. In particular, a container with all zero addresses hashes -// to 0. Hopefully this won't be an issue because we are usually hashing -// objects of a single type in any given hash table. -// -// Based on http://burtleburtle.net/bob/hash/hashfaq.html - -:(before "End Primitive Recipe Declarations") -HASH, -:(before "End Primitive Recipe Numbers") -put(Recipe_ordinal, "hash", HASH); -:(before "End Primitive Recipe Checks") -case HASH: { - if (SIZE(inst.ingredients) != 1) { - raise << maybe(get(Recipe, r).name) << "'hash' takes exactly one ingredient rather than '" << to_original_string(inst) << "'\n" << end(); - break; - } - break; -} -:(before "End Primitive Recipe Implementations") -case HASH: { - const reagent& input = current_instruction().ingredients.at(0); - products.resize(1); - products.at(0).push_back(hash(0, input)); - break; -} - -//: in all the code below, the intermediate results of hashing are threaded through 'h' - -:(code) -size_t hash(size_t h, reagent/*copy*/ r) { - canonize(r); - if (is_mu_text(r)) // optimization - return hash_mu_text(h, r); - else if (is_mu_address(r)) - return hash_mu_address(h, r); - else if (is_mu_scalar(r)) - return hash_mu_scalar(h, r); - else if (is_mu_array(r)) - return hash_mu_array(h, r); - else if (is_mu_container(r)) - return hash_mu_container(h, r); - else if (is_mu_exclusive_container(r)) - return hash_mu_exclusive_container(h, r); - assert(false); -} - -size_t hash_mu_scalar(size_t h, const reagent& r) { - double input = is_literal(r) ? r.value : get_or_insert(Memory, r.value); - return hash_iter(h, static_cast<size_t>(input)); -} - -size_t hash_mu_address(size_t h, reagent& r) { - if (r.value == 0) return 0; - trace(Callstack_depth+1, "mem") << "location " << r.value << " is " << no_scientific(get_or_insert(Memory, r.value)) << end(); - r.set_value(get_or_insert(Memory, r.value)); - drop_from_type(r, "address"); - return hash(h, r); -} - -size_t hash_mu_text(size_t h, const reagent& r) { - string input = read_mu_text(get_or_insert(Memory, r.value+/*skip alloc id*/1)); - for (int i = 0; i < SIZE(input); ++i) { - h = hash_iter(h, static_cast<size_t>(input.at(i))); -//? cerr << i << ": " << h << '\n'; - } - return h; -} - -size_t hash_mu_array(size_t h, const reagent& r) { - int size = get_or_insert(Memory, r.value); - reagent/*copy*/ elem = r; - delete elem.type; - elem.type = copy_array_element(r.type); - for (int i=0, address = r.value+1; i < size; ++i, address += size_of(elem)) { - reagent/*copy*/ tmp = elem; - tmp.set_value(address); - h = hash(h, tmp); -//? cerr << i << " (" << address << "): " << h << '\n'; - } - return h; -} - -size_t hash_mu_container(size_t h, const reagent& r) { - type_info& info = get(Type, get_base_type(r.type)->value); - int address = r.value; - int offset = 0; - for (int i = 0; i < SIZE(info.elements); ++i) { - reagent/*copy*/ element = element_type(r.type, i); - if (has_property(element, "ignore-for-hash")) continue; - element.set_value(address+offset); - h = hash(h, element); -//? cerr << i << ": " << h << '\n'; - offset += size_of(info.elements.at(i).type); - } - return h; -} - -size_t hash_mu_exclusive_container(size_t h, const reagent& r) { - const type_tree* type = get_base_type(r.type); - assert(type->value); - int tag = get(Memory, r.value); - reagent/*copy*/ variant = variant_type(r, tag); - // todo: move this error to container definition time - if (has_property(variant, "ignore-for-hash")) - raise << get(Type, type->value).name << ": /ignore-for-hash won't work in exclusive containers\n" << end(); - variant.set_value(r.value + /*skip tag*/1); - h = hash(h, variant); - return h; -} - -size_t hash_iter(size_t h, size_t input) { - h += input; - h += (h<<10); - h ^= (h>>6); - - h += (h<<3); - h ^= (h>>11); - h += (h<<15); - return h; -} - -void test_hash_container_checks_all_elements() { - run( - "container foo [\n" - " x:num\n" - " y:char\n" - "]\n" - "def main [\n" - " 1:foo <- merge 34, 97/a\n" - " 3:num <- hash 1:foo\n" - " return-unless 3:num\n" - " 4:foo <- merge 34, 98/a\n" - " 6:num <- hash 4:foo\n" - " return-unless 6:num\n" - " 7:bool <- equal 3:num, 6:num\n" - "]\n" - ); - // hash on containers includes all elements - CHECK_TRACE_CONTENTS( - "mem: storing 0 in location 7\n" - ); -} - -void test_hash_exclusive_container_checks_all_elements() { - run( - "exclusive-container foo [\n" - " x:bar\n" - " y:num\n" - "]\n" - "container bar [\n" - " a:num\n" - " b:num\n" - "]\n" - "def main [\n" - " 1:foo <- merge 0/x, 34, 35\n" - " 4:num <- hash 1:foo\n" - " return-unless 4:num\n" - " 5:foo <- merge 0/x, 34, 36\n" - " 8:num <- hash 5:foo\n" - " return-unless 8:num\n" - " 9:bool <- equal 4:num, 8:num\n" - "]\n" - ); - // hash on containers includes all elements - CHECK_TRACE_CONTENTS( - "mem: storing 0 in location 9\n" - ); -} - -void test_hash_can_ignore_container_elements() { - run( - "container foo [\n" - " x:num\n" - " y:char/ignore-for-hash\n" - "]\n" - "def main [\n" - " 1:foo <- merge 34, 97/a\n" - " 3:num <- hash 1:foo\n" - " return-unless 3:num\n" - " 4:foo <- merge 34, 98/a\n" - " 6:num <- hash 4:foo\n" - " return-unless 6:num\n" - " 7:bool <- equal 3:num, 6:num\n" - "]\n" - ); - // hashes match even though y is different - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 7\n" - ); -} - -//: These properties aren't necessary for hash, they just test that the -//: current implementation works like we think it does. - -void test_hash_of_zero_address() { - run( - "def main [\n" - " 1:&:num <- copy null\n" - " 2:num <- hash 1:&:num\n" - "]\n" - ); - CHECK_TRACE_CONTENTS( - "mem: storing 0 in location 2\n" - ); -} - -//: This is probably too aggressive, but we need some way to avoid depending -//: on the precise bit pattern of a floating-point number. -void test_hash_of_numbers_ignores_fractional_part() { - run( - "def main [\n" - " 1:num <- hash 1.5\n" - " 2:num <- hash 1\n" - " 3:bool <- equal 1:num, 2:num\n" - "]\n" - ); - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 3\n" - ); -} - -void test_hash_of_array_same_as_string() { - run( - "def main [\n" - " 10:num <- copy 3\n" - " 11:num <- copy 97\n" - " 12:num <- copy 98\n" - " 13:num <- copy 99\n" - " 2:num <- hash 10:@:num/unsafe\n" - " return-unless 2:num\n" - " 3:text <- new [abc]\n" - " 4:num <- hash 3:text\n" - " return-unless 4:num\n" - " 5:bool <- equal 2:num, 4:num\n" - "]\n" - ); - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 5\n" - ); -} - -void test_hash_ignores_address_value() { - run( - "def main [\n" - " 1:&:num <- new number:type\n" - " *1:&:num <- copy 34\n" - " 2:num <- hash 1:&:num\n" - " 3:&:num <- new number:type\n" - " *3:&:num <- copy 34\n" - " 4:num <- hash 3:&:num\n" - " 5:bool <- equal 2:num, 4:num\n" - "]\n" - ); - // different addresses hash to the same result as long as the values the point to do so - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 5\n" - ); -} - -void test_hash_container_depends_only_on_elements() { - run( - "container foo [\n" - " x:num\n" - " y:char\n" - "]\n" - "container bar [\n" - " x:num\n" - " y:char\n" - "]\n" - "def main [\n" - " 1:foo <- merge 34, 97/a\n" - " 3:num <- hash 1:foo\n" - " return-unless 3:num\n" - " 4:bar <- merge 34, 97/a\n" - " 6:num <- hash 4:bar\n" - " return-unless 6:num\n" - " 7:bool <- equal 3:num, 6:num\n" - "]\n" - ); - // containers with identical elements return identical hashes - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 7\n" - ); -} - -void test_hash_container_depends_only_on_elements_2() { - run( - "container foo [\n" - " x:num\n" - " y:char\n" - " z:&:num\n" - "]\n" - "def main [\n" - " 1:&:num <- new number:type\n" - " *1:&:num <- copy 34\n" - " 2:foo <- merge 34, 97/a, 1:&:num\n" - " 5:num <- hash 2:foo\n" - " return-unless 5:num\n" - " 6:&:num <- new number:type\n" - " *6:&:num <- copy 34\n" - " 7:foo <- merge 34, 97/a, 6:&:num\n" - " 10:num <- hash 7:foo\n" - " return-unless 10:num\n" - " 11:bool <- equal 5:num, 10:num\n" - "]\n" - ); - // containers with identical 'leaf' elements return identical hashes - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 11\n" - ); -} - -void test_hash_container_depends_only_on_elements_3() { - run( - "container foo [\n" - " x:num\n" - " y:char\n" - " z:bar\n" - "]\n" - "container bar [\n" - " x:num\n" - " y:num\n" - "]\n" - "def main [\n" - " 1:foo <- merge 34, 97/a, 47, 48\n" - " 6:num <- hash 1:foo\n" - " return-unless 6:num\n" - " 7:foo <- merge 34, 97/a, 47, 48\n" - " 12:num <- hash 7:foo\n" - " return-unless 12:num\n" - " 13:bool <- equal 6:num, 12:num\n" - "]\n" - ); - // containers with identical 'leaf' elements return identical hashes - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 13\n" - ); -} - -void test_hash_exclusive_container_ignores_tag() { - run( - "exclusive-container foo [\n" - " x:bar\n" - " y:num\n" - "]\n" - "container bar [\n" - " a:num\n" - " b:num\n" - "]\n" - "def main [\n" - " 1:foo <- merge 0/x, 34, 35\n" - " 4:num <- hash 1:foo\n" - " return-unless 4:num\n" - " 5:bar <- merge 34, 35\n" - " 7:num <- hash 5:bar\n" - " return-unless 7:num\n" - " 8:bool <- equal 4:num, 7:num\n" - "]\n" - ); - // hash on containers includes all elements - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 8\n" - ); -} - -//: An older version that supported only strings. -//: Hash functions are subtle and easy to get wrong, so we keep the old -//: version around and check that the new one is consistent with it. - -void test_hash_matches_old_version() { - run( - "def main [\n" - " 1:text <- new [abc]\n" - " 3:num <- hash 1:text\n" - " 4:num <- hash_old 1:text\n" - " 5:bool <- equal 3:num, 4:num\n" - "]\n" - ); - CHECK_TRACE_CONTENTS( - "mem: storing 1 in location 5\n" - ); -} - -:(before "End Primitive Recipe Declarations") -HASH_OLD, -:(before "End Primitive Recipe Numbers") -put(Recipe_ordinal, "hash_old", HASH_OLD); -:(before "End Primitive Recipe Checks") -case HASH_OLD: { - if (SIZE(inst.ingredients) != 1) { - raise << maybe(get(Recipe, r).name) << "'hash_old' takes exactly one ingredient rather than '" << to_original_string(inst) << "'\n" << end(); - break; - } - if (!is_mu_text(inst.ingredients.at(0))) { - raise << maybe(get(Recipe, r).name) << "'hash_old' currently only supports texts (address array character), but got '" << inst.ingredients.at(0).original_string << "'\n" << end(); - break; - } - break; -} -:(before "End Primitive Recipe Implementations") -case HASH_OLD: { - string input = read_mu_text(ingredients.at(0).at(/*skip alloc id*/1)); - size_t h = 0 ; - - for (int i = 0; i < SIZE(input); ++i) { - h += static_cast<size_t>(input.at(i)); - h += (h<<10); - h ^= (h>>6); - - h += (h<<3); - h ^= (h>>11); - h += (h<<15); - } - - products.resize(1); - products.at(0).push_back(h); - break; -} |