diff options
author | arkedos <arkedos@mailbox.org> | 2019-12-07 22:02:24 +0100 |
---|---|---|
committer | toonn <toonn@toonn.io> | 2019-12-23 16:36:23 +0100 |
commit | 73b98e7d74411a525d82051929ef72dce0e03c1e (patch) | |
tree | f0f831cc56516a7ab1cc48dd2f299e6ed88263a6 | |
parent | 4ff19f6c059fc0d477273304988dc6b7c0ee6320 (diff) | |
download | ranger-73b98e7d74411a525d82051929ef72dce0e03c1e.tar.gz |
Added a filter for unique files by md5 hash
-rw-r--r-- | ranger/core/filter_stack.py | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/ranger/core/filter_stack.py b/ranger/core/filter_stack.py index 2ca2b1c5..4c00e09b 100644 --- a/ranger/core/filter_stack.py +++ b/ranger/core/filter_stack.py @@ -9,6 +9,7 @@ import re import mimetypes from ranger.container.directory import accept_file, InodeFilterConstants +from ranger.core.shared import FileManagerAware # pylint: disable=too-few-public-methods @@ -65,6 +66,55 @@ class MimeFilter(BaseFilter): return "<Filter: mimetype =~ /{}/>".format(self.pattern) +@stack_filter("hash") +class HashFilter(BaseFilter): + + def __init__(self, *args): + self.args = list(*args) + self.hasher = None + self.hashes = {} + self.duplicates = {} + + def __call__(self, fobj): + file_paths = [item.basename for item in + FileManagerAware.fm.thisdir.files_all if item.is_file] + if not self.args: + self.duplicates = self.get_duplicates(file_paths) + return fobj.basename not in self.duplicates + elif self.args[0].strip() == 'd': + self.duplicates = self.get_duplicates(file_paths) + return fobj.basename in self.duplicates + # return nothing if wrong args are passed + return None + + def __str__(self): + return "<Filter: hash {}>".format(self.args) + + def get_hash(self, file_basename): + import hashlib + self.hasher = hashlib.md5() + data = open(file_basename, 'rb') + buff = data.read() + self.hasher.update(buff) + data.close() + return self.hasher.hexdigest() + + def get_duplicates(self, file_paths): + for file_base in file_paths: + hash_value = self.get_hash(file_base) + self.hashes[file_base] = hash_value + + for key, value in self.hashes.items(): + for file_name, hash_value in self.hashes.items(): + # do nothing if it checking for the same files + if key == file_name: + pass + elif value == hash_value: + self.duplicates[key] = value + + return self.duplicates + + @stack_filter("type") class TypeFilter(BaseFilter): type_to_function = { |