summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorarkedos <arkedos@mailbox.org>2019-12-07 22:02:24 +0100
committertoonn <toonn@toonn.io>2019-12-23 16:36:23 +0100
commit73b98e7d74411a525d82051929ef72dce0e03c1e (patch)
treef0f831cc56516a7ab1cc48dd2f299e6ed88263a6
parent4ff19f6c059fc0d477273304988dc6b7c0ee6320 (diff)
downloadranger-73b98e7d74411a525d82051929ef72dce0e03c1e.tar.gz
Added a filter for unique files by md5 hash
-rw-r--r--ranger/core/filter_stack.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/ranger/core/filter_stack.py b/ranger/core/filter_stack.py
index 2ca2b1c5..4c00e09b 100644
--- a/ranger/core/filter_stack.py
+++ b/ranger/core/filter_stack.py
@@ -9,6 +9,7 @@ import re
 import mimetypes
 
 from ranger.container.directory import accept_file, InodeFilterConstants
+from ranger.core.shared import FileManagerAware
 
 # pylint: disable=too-few-public-methods
 
@@ -65,6 +66,55 @@ class MimeFilter(BaseFilter):
         return "<Filter: mimetype =~ /{}/>".format(self.pattern)
 
 
+@stack_filter("hash")
+class HashFilter(BaseFilter):
+
+    def __init__(self, *args):
+        self.args = list(*args)
+        self.hasher = None
+        self.hashes = {}
+        self.duplicates = {}
+
+    def __call__(self, fobj):
+        file_paths = [item.basename for item in
+                      FileManagerAware.fm.thisdir.files_all if item.is_file]
+        if not self.args:
+            self.duplicates = self.get_duplicates(file_paths)
+            return fobj.basename not in self.duplicates
+        elif self.args[0].strip() == 'd':
+            self.duplicates = self.get_duplicates(file_paths)
+            return fobj.basename in self.duplicates
+        # return nothing if wrong args are passed
+        return None
+
+    def __str__(self):
+        return "<Filter: hash {}>".format(self.args)
+
+    def get_hash(self, file_basename):
+        import hashlib
+        self.hasher = hashlib.md5()
+        data = open(file_basename, 'rb')
+        buff = data.read()
+        self.hasher.update(buff)
+        data.close()
+        return self.hasher.hexdigest()
+
+    def get_duplicates(self, file_paths):
+        for file_base in file_paths:
+            hash_value = self.get_hash(file_base)
+            self.hashes[file_base] = hash_value
+
+            for key, value in self.hashes.items():
+                for file_name, hash_value in self.hashes.items():
+                    # do nothing if it checking for the same files
+                    if key == file_name:
+                        pass
+                    elif value == hash_value:
+                        self.duplicates[key] = value
+
+        return self.duplicates
+
+
 @stack_filter("type")
 class TypeFilter(BaseFilter):
     type_to_function = {