Merge branch 'aRkedos-unique-file'

author: toonn <toonn@toonn.io> 2019-12-30 19:50:02 +0100
committer: toonn <toonn@toonn.io> 2019-12-30 19:50:02 +0100
commit: 66ac453ab502033a649c5d0a8836298b257bdc7a (patch)
tree: 67494511223b561c3cd0767a41345d3983048628 /ranger
parent: cd4a3723be57abddadfc8738dd91bc52d829155c (diff)
parent: 81d61afdeb25702ff836723da4e9dc5315a217d1 (diff)
download: ranger-66ac453ab502033a649c5d0a8836298b257bdc7a.tar.gz
4 files changed, 144 insertions, 4 deletions
diff --git a/ranger/config/commands.py b/ranger/config/commands.py
index e66ce849..5defa677 100755
--- a/ranger/config/commands.py
+++ b/ranger/config/commands.py
@@ -1691,7 +1691,7 @@ class filter_stack(Command):
         elif subcommand == "clear":
             self.fm.thisdir.filter_stack = []
         elif subcommand == "rotate":
-            rotate_by = int(self.arg(2) or 1)
+            rotate_by = int(self.arg(2) or self.quantifier or 1)
             self.fm.thisdir.filter_stack = (
                 self.fm.thisdir.filter_stack[-rotate_by:]
                 + self.fm.thisdir.filter_stack[:-rotate_by]
diff --git a/ranger/config/rc.conf b/ranger/config/rc.conf
index 7226130d..66a5fbbc 100644
--- a/ranger/config/rc.conf
+++ b/ranger/config/rc.conf
@@ -596,15 +596,18 @@ map zf    console filter%space
 copymap zf zz
 
 # Filter stack
-map .n console filter_stack add name%space
-map .m console filter_stack add mime%space
 map .d filter_stack add type d
 map .f filter_stack add type f
 map .l filter_stack add type l
+map .m console filter_stack add mime%space
+map .n console filter_stack add name%space
+map .# console filter_stack add hash%space
+map ." filter_stack add duplicate
+map .' filter_stack add unique
 map .| filter_stack add or
 map .& filter_stack add and
 map .! filter_stack add not
-map .r console filter_stack rotate
+map .r filter_stack rotate
 map .c filter_stack clear
 map .* filter_stack decompose
 map .p filter_stack pop
diff --git a/ranger/core/filter_stack.py b/ranger/core/filter_stack.py
index 2ca2b1c5..59495437 100644
--- a/ranger/core/filter_stack.py
+++ b/ranger/core/filter_stack.py
@@ -7,8 +7,17 @@ from __future__ import (absolute_import, division, print_function)
 
 import re
 import mimetypes
+# pylint: disable=invalid-name
+try:
+    from itertools import izip_longest as zip_longest
+except ImportError:
+    from itertools import zip_longest
+# pylint: enable=invalid-name
+from os.path import abspath
 
 from ranger.container.directory import accept_file, InodeFilterConstants
+from ranger.core.shared import FileManagerAware
+from ranger.ext.hash import hash_chunks
 
 # pylint: disable=too-few-public-methods
 
@@ -65,6 +74,104 @@ class MimeFilter(BaseFilter):
         return "<Filter: mimetype =~ /{}/>".format(self.pattern)
 
 
+@stack_filter("hash")
+class HashFilter(BaseFilter, FileManagerAware):
+    def __init__(self, filepath = None):
+        if filepath is None:
+            self.filepath = self.fm.thisfile.path
+        else:
+            self.filepath = filepath
+        if self.filepath is None:
+            self.fm.notify("Error: No file selected for hashing!", bad=True)
+        # TODO: Lazily generated list would be more efficient, a generator
+        #       isn't enough because this object is reused for every fsobject
+        #       in the current directory.
+        self.filehash = list(hash_chunks(abspath(self.filepath)))
+
+    def __call__(self, fobj):
+        for (chunk1, chunk2) in zip_longest(self.filehash,
+                                            hash_chunks(fobj.path),
+                                            fillvalue=''):
+            if chunk1 != chunk2:
+                return False
+        return True
+
+    def __str__(self):
+        return "<Filter: hash {}>".format(self.filepath)
+
+
+def group_by_hash(fsobjects):
+    hashes = {}
+    for fobj in fsobjects:
+        chunks = hash_chunks(fobj.path)
+        chunk = next(chunks)
+        while chunk in hashes:
+            for dup in hashes[chunk]:
+                _, dup_chunks = dup
+                try:
+                    hashes[next(dup_chunks)] = [dup]
+                    hashes[chunk].remove(dup)
+                except StopIteration:
+                    pass
+            try:
+                chunk = next(chunks)
+            except StopIteration:
+                hashes[chunk].append((fobj, chunks))
+                break
+        else:
+            hashes[chunk] = [(fobj, chunks)]
+
+    groups = []
+    for dups in hashes.values():
+        group = []
+        for (dup, _) in dups:
+            group.append(dup)
+        if group:
+            groups.append(group)
+
+    return groups
+
+
+@stack_filter("duplicate")
+class DuplicateFilter(BaseFilter, FileManagerAware):
+    def __init__(self, _):
+        self.duplicates = self.get_duplicates()
+
+    def __call__(self, fobj):
+        return fobj in self.duplicates
+
+    def __str__(self):
+        return "<Filter: duplicate>"
+
+    def get_duplicates(self):
+        duplicates = set()
+        for dups in group_by_hash(self.fm.thisdir.files_all):
+            if len(dups) >= 2:
+                duplicates.update(dups)
+        return duplicates
+
+
+@stack_filter("unique")
+class UniqueFilter(BaseFilter, FileManagerAware):
+    def __init__(self, _):
+        self.unique = self.get_unique()
+
+    def __call__(self, fobj):
+        return fobj in self.unique
+
+    def __str__(self):
+        return "<Filter: unique>"
+
+    def get_unique(self):
+        unique = set()
+        for dups in group_by_hash(self.fm.thisdir.files_all):
+            try:
+                unique.add(min(dups, key=lambda fobj: fobj.stat.st_ctime))
+            except ValueError:
+                pass
+        return unique
+
+
 @stack_filter("type")
 class TypeFilter(BaseFilter):
     type_to_function = {
diff --git a/ranger/ext/hash.py b/ranger/ext/hash.py
new file mode 100644
index 00000000..d9b2234b
--- /dev/null
+++ b/ranger/ext/hash.py
@@ -0,0 +1,30 @@
+# This file is part of ranger, the console file manager.
+# License: GNU GPL version 3, see the file "AUTHORS" for details.
+
+from __future__ import (absolute_import, division, print_function)
+
+from os import listdir
+from os.path import getsize, isdir
+from hashlib import sha256
+
+# pylint: disable=invalid-name
+
+
+def hash_chunks(filepath, h=None):
+    if not h:
+        h = sha256()
+    if isdir(filepath):
+        h.update(filepath)
+        yield h.hexdigest()
+        for fp in listdir(filepath):
+            for fp_chunk in hash_chunks(fp, h=h):
+                yield fp_chunk
+    elif getsize(filepath) == 0:
+        yield h.hexdigest()
+    else:
+        with open(filepath, 'rb') as f:
+            # Read the file in ~64KiB chunks (multiple of sha256's block
+            # size that works well enough with HDDs and SSDs)
+            for chunk in iter(lambda: f.read(h.block_size * 1024), b''):
+                h.update(chunk)
+                yield h.hexdigest()
author	toonn <toonn@toonn.io>	2019-12-30 19:50:02 +0100
committer	toonn <toonn@toonn.io>	2019-12-30 19:50:02 +0100
commit	66ac453ab502033a649c5d0a8836298b257bdc7a (patch)
tree	67494511223b561c3cd0767a41345d3983048628 /ranger
parent	cd4a3723be57abddadfc8738dd91bc52d829155c (diff)
parent	81d61afdeb25702ff836723da4e9dc5315a217d1 (diff)
download	ranger-66ac453ab502033a649c5d0a8836298b257bdc7a.tar.gz