Refactor encoding detection

author: toonn <toonn@toonn.io> 2018-10-28 13:46:29 +0100
committer: toonn <toonn@toonn.io> 2018-10-28 13:53:26 +0100
commit: 55099758427b04201510774b4526bd2b0d331b48 (patch)
tree: 5359cbf6041d8ba79169fafeeef231ee3e3eb750 /ranger
parent: 9b1af8452642267ae982853ba331bb8fd8f22320 (diff)
download: ranger-55099758427b04201510774b4526bd2b0d331b48.tar.gz
1 files changed, 22 insertions, 19 deletions
diff --git a/ranger/core/actions.py b/ranger/core/actions.py
index 7e5765b0..5358a40f 100644
--- a/ranger/core/actions.py
+++ b/ranger/core/actions.py
@@ -1167,33 +1167,36 @@ class Actions(  # pylint: disable=too-many-instance-attributes,too-many-public-m
     @staticmethod
     def read_text_file(path, count=None):
         """Encoding-aware reading of a text file."""
+        # Guess encoding ourselves.
+        # These should be the most frequently used ones.
+        # latin-1 as the last resort
+        encodings = [ ('utf-8', 'strict')
+                    , ('utf-16', 'strict')
+                    , ('latin-1', 'replace')
+                    ]
+
+        with open(path, 'rb') as fobj:
+            data = fobj.read(count)
+
         try:
             import chardet
         except ImportError:
-            # Guess encoding ourselves.
-            # These should be the most frequently used ones.
-            encodings = ('utf-8', 'utf-16')
-            for encoding in encodings:
-                try:
-                    with codecs.open(path, 'r', encoding=encoding) as fobj:
-                        text = fobj.read(count)
-                except UnicodeDecodeError:
-                    pass
-                else:
-                    LOG.debug("guessed encoding of '%s' as %r", path, encoding)
-                    return text
+            pass
         else:
-            with open(path, 'rb') as fobj:
-                data = fobj.read(count)
             result = chardet.detect(data)
             guessed_encoding = result['encoding']
             if guessed_encoding is not None:
-                LOG.debug("chardet guess for '%s': %s", path, result)
-                return codecs.decode(data, guessed_encoding, 'replace')
+                # Add chardet's guess before our own.
+                encodings.insert(0, (guessed_encoding, 'replace'))
 
-        # latin-1 as the last resort
-        with codecs.open(path, 'r', encoding='latin-1', errors='replace') as fobj:
-            return fobj.read(count)
+        for (encoding, error_scheme) in encodings:
+            try:
+                text = codecs.decode(data, encoding, error_scheme)
+            except UnicodeDecodeError:
+                pass
+            else:
+                LOG.debug("Guessed encoding of '%s' as %s", path, encoding)
+                return text
 
     # --------------------------
     # -- Tabs
author	toonn <toonn@toonn.io>	2018-10-28 13:46:29 +0100
committer	toonn <toonn@toonn.io>	2018-10-28 13:53:26 +0100
commit	55099758427b04201510774b4526bd2b0d331b48 (patch)
tree	5359cbf6041d8ba79169fafeeef231ee3e3eb750 /ranger
parent	9b1af8452642267ae982853ba331bb8fd8f22320 (diff)
download	ranger-55099758427b04201510774b4526bd2b0d331b48.tar.gz