about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authortoonn <toonn@toonn.io>2018-10-28 14:24:31 +0100
committertoonn <toonn@toonn.io>2018-10-28 14:24:31 +0100
commite56c74bb30ad2f363280e4d1a14bf2c20dff5b5f (patch)
treeb6332429c7c2bba42b67a78e75fe5c26b15a4986
parente9ef0d9517bb3b29596f4a8105908e1c428e06fd (diff)
parent89b4b1290cd7ad43ce6452876117323f03616a39 (diff)
downloadranger-e56c74bb30ad2f363280e4d1a14bf2c20dff5b5f.tar.gz
Merge branch 'baranovskiy-fix_1350'
-rw-r--r--ranger/core/actions.py39
1 files changed, 21 insertions, 18 deletions
diff --git a/ranger/core/actions.py b/ranger/core/actions.py
index 20a180e9..6cf376eb 100644
--- a/ranger/core/actions.py
+++ b/ranger/core/actions.py
@@ -1167,31 +1167,34 @@ class Actions(  # pylint: disable=too-many-instance-attributes,too-many-public-m
     @staticmethod
     def read_text_file(path, count=None):
         """Encoding-aware reading of a text file."""
+        # Guess encoding ourselves.
+        # These should be the most frequently used ones.
+        # latin-1 as the last resort
+        encodings = [('utf-8', 'strict'), ('utf-16', 'strict'),
+                     ('latin-1', 'replace')]
+
+        with open(path, 'rb') as fobj:
+            data = fobj.read(count)
+
         try:
             import chardet
         except ImportError:
-            # Guess encoding ourselves. These should be the most frequently used ones.
-            encodings = ('utf-8', 'utf-16')
-            for encoding in encodings:
-                try:
-                    with codecs.open(path, 'r', encoding=encoding) as fobj:
-                        text = fobj.read(count)
-                except UnicodeDecodeError:
-                    pass
-                else:
-                    LOG.debug("guessed encoding of '%s' as %r", path, encoding)
-                    return text
+            pass
         else:
-            with open(path, 'rb') as fobj:
-                data = fobj.read(count)
             result = chardet.detect(data)
-            LOG.debug("chardet guess for '%s': %s", path, result)
             guessed_encoding = result['encoding']
-            return codecs.decode(data, guessed_encoding, 'replace')
+            if guessed_encoding is not None:
+                # Add chardet's guess before our own.
+                encodings.insert(0, (guessed_encoding, 'replace'))
 
-        # latin-1 as the last resort
-        with codecs.open(path, 'r', encoding='latin-1', errors='replace') as fobj:
-            return fobj.read(count)
+        for (encoding, error_scheme) in encodings:
+            try:
+                text = codecs.decode(data, encoding, error_scheme)
+            except UnicodeDecodeError:
+                pass
+            else:
+                LOG.debug("Guessed encoding of '%s' as %s", path, encoding)
+                return text
 
     # --------------------------
     # -- Tabs