diff options
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | ranger/core/actions.py | 42 |
2 files changed, 34 insertions, 9 deletions
diff --git a/README.md b/README.md index df17f731..8abb7265 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Optional, for enhanced file previews (with `scope.sh`): * `transmission-show` for viewing bit-torrent information * `mediainfo` or `exiftool` for viewing information about media files * `odt2txt` for OpenDocument text files (`odt`, `ods`, `odp` and `sxw`) +* `chardet` (Python package) for improved encoding detection of text files Installing diff --git a/ranger/core/actions.py b/ranger/core/actions.py index 3e488159..e790aaa8 100644 --- a/ranger/core/actions.py +++ b/ranger/core/actions.py @@ -409,7 +409,8 @@ class Actions( # pylint: disable=too-many-instance-attributes,too-many-public-m # ranger can act as a file chooser when running with --choosefile=... if mode == 0 and 'label' not in kw: if ranger.args.choosefile: - open(ranger.args.choosefile, 'w').write(self.fm.thisfile.path) + with open(ranger.args.choosefile, 'w') as fobj: + fobj.write(self.fm.thisfile.path) if ranger.args.choosefiles: paths = [] @@ -978,6 +979,7 @@ class Actions( # pylint: disable=too-many-instance-attributes,too-many-public-m if not self.settings.preview_script or not self.settings.use_preview_script: try: + # XXX: properly determine file's encoding return codecs.open(path, 'r', errors='ignore') # IOError for Python2, OSError for Python3 except (IOError, OSError): @@ -1063,14 +1065,7 @@ class Actions( # pylint: disable=too-many-instance-attributes,too-many-public-m data[(-1, -1)] = None data['foundpreview'] = False elif rcode == 2: - fobj = codecs.open(path, 'r', errors='ignore') - try: - data[(-1, -1)] = fobj.read(1024 * 32) - except UnicodeDecodeError: - fobj.close() - fobj = codecs.open(path, 'r', encoding='latin-1', errors='ignore') - data[(-1, -1)] = fobj.read(1024 * 32) - fobj.close() + data[(-1, -1)] = self.read_text_file(path, 1024 * 32) else: data[(-1, -1)] = None @@ -1111,6 +1106,35 @@ class Actions( # pylint: disable=too-many-instance-attributes,too-many-public-m return None + @staticmethod + def read_text_file(path, count=None): + """Encoding-aware reading of a text file.""" + try: + import chardet + except ImportError: + # Guess encoding ourselves. These should be the most frequently used ones. + encodings = ('utf-8', 'utf-16') + for encoding in encodings: + try: + with codecs.open(path, 'r', encoding=encoding) as fobj: + text = fobj.read(count) + except UnicodeDecodeError: + pass + else: + LOG.debug("guessed encoding of '%s' as %r", path, encoding) + return text + else: + with open(path, 'rb') as fobj: + data = fobj.read(count) + result = chardet.detect(data) + LOG.debug("chardet guess for '%s': %s", path, result) + guessed_encoding = result['encoding'] + return codecs.decode(data, guessed_encoding, 'replace') + + # latin-1 as the last resort + with codecs.open(path, 'r', encoding='latin-1', errors='replace') as fobj: + return fobj.read(count) + # -------------------------- # -- Tabs # -------------------------- |