From acd95a386383a7df2eb1d2ea7557f4a31efb8fcc Mon Sep 17 00:00:00 2001 From: toonn Date: Mon, 23 Dec 2019 20:34:02 +0100 Subject: Change hash filter to match on hash The `hash` filter for `filter_stack` requires a path as argument, defaulting to the currently selected file. It filters out any files or directories with a different hash. --- ranger/config/rc.conf | 1 + ranger/core/filter_stack.py | 82 ++++++++++++++++++++++----------------------- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/ranger/config/rc.conf b/ranger/config/rc.conf index e557d4de..666a99cf 100644 --- a/ranger/config/rc.conf +++ b/ranger/config/rc.conf @@ -595,6 +595,7 @@ copymap zf zz # Filter stack map .n console filter_stack add name%space map .m console filter_stack add mime%space +map .# console filter_stack add hash%space map .d filter_stack add type d map .f filter_stack add type f map .l filter_stack add type l diff --git a/ranger/core/filter_stack.py b/ranger/core/filter_stack.py index 5c1297e5..5d40825f 100644 --- a/ranger/core/filter_stack.py +++ b/ranger/core/filter_stack.py @@ -7,6 +7,15 @@ from __future__ import (absolute_import, division, print_function) import re import mimetypes +# pylint: disable=invalid-name +try: + from itertools import izip_longest + zip_longest = izip_longest +except ImportError: + from itertools import zip_longest +# pylint: enable=invalid-name +from os import listdir +from os.path import abspath, isdir from ranger.container.directory import accept_file, InodeFilterConstants from ranger.core.shared import FileManagerAware @@ -67,52 +76,43 @@ class MimeFilter(BaseFilter): @stack_filter("hash") -class HashFilter(BaseFilter): - - def __init__(self, *args): - self.args = list(*args) - self.hasher = None - self.hashes = {} - self.duplicates = {} - +class HashFilter(BaseFilter, FileManagerAware): + def __init__(self, filepath): + self.filepath = filepath if filepath else self.fm.thisfile.path + if not self.filepath: + self.fm.notify("Error: No file selected for hashing!", bad=True) + # TODO: Lazily generated list would be more efficient, a generator + # isn't enough because this object is reused for every fsobject + # in the current directory. + self.filehash = list(self.hash_chunks(abspath(self.filepath))) + + # pylint: disable=invalid-name def __call__(self, fobj): - file_paths = [item.basename for item in - FileManagerAware.fm.thisdir.files_all if item.is_file] - if not self.args: - self.duplicates = self.get_duplicates(file_paths) - return fobj.basename not in self.duplicates - elif self.args[0].strip() == 'd': - self.duplicates = self.get_duplicates(file_paths) - return fobj.basename in self.duplicates - # return nothing if wrong args are passed - return None + for (c1, c2) in zip_longest(self.filehash, + self.hash_chunks(fobj.path), + fillvalue=''): + if c1 != c2: + return False + return True def __str__(self): - return "".format(self.args) + return "".format(self.filepath) - def get_hash(self, file_basename): + def hash_chunks(self, filepath): from hashlib import sha256 - self.hasher = sha256() - data = open(file_basename, 'rb') - buff = data.read() - self.hasher.update(buff) - data.close() - return self.hasher.hexdigest() - - def get_duplicates(self, file_paths): - for file_base in file_paths: - hash_value = self.get_hash(file_base) - self.hashes[file_base] = hash_value - - for key, value in self.hashes.items(): - for file_name, hash_value in self.hashes.items(): - # do nothing if it checking for the same files - if key == file_name: - pass - elif value == hash_value: - self.duplicates[key] = value - - return self.duplicates + if isdir(filepath): + yield filepath + for fp in listdir(filepath): + self.hash_chunks(fp) + else: + with open(filepath, 'rb') as f: + h = sha256() + # Read the file in ~64KiB chunks (multiple of sha256's block + # size that works well enough with HDDs and SSDs) + for chunk in iter(lambda: f.read(h.block_size * 1024), b''): + h.update(chunk) + yield h.hexdigest() + # pylint: enable=invalid-name @stack_filter("type") -- cgit 1.4.1-2-gfad0