summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--ranger/ext/utfwidth.py45
-rw-r--r--ranger/gui/curses_shortcuts.py9
-rw-r--r--test/tc_utfwidth.py5
3 files changed, 20 insertions, 39 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
index 364db757..762f3894 100644
--- a/ranger/ext/utfwidth.py
+++ b/ranger/ext/utfwidth.py
@@ -28,20 +28,25 @@ WIDE = 2
 
 def uwid(string, count=maxint):
 	"""Return the width of a string"""
-	end = len(string)
-	i = 0
+	try:
+		string = string.decode('utf8', 'replace')
+	except AttributeError:
+		pass
 	width = 0
-	while i < end and count:
-		bytelen = utf_byte_length(string[i:])
-		width += utf_char_width(string[i:i+bytelen])
-		i += bytelen
+	for c in string:
+		width += utf_char_width(c)
 		count -= 1
+		if not count:
+			break
 	return width
 
 def uchars(string):
 	"""Return a list with one string for each character"""
-	end = len(string)
-	i = 0
+	try:
+		string = string.decode('utf-8', 'replace')
+	except AttributeError:
+		pass
+	return list(string)
 	result = []
 	while i < end:
 		bytelen = utf_byte_length(string[i:])
@@ -60,24 +65,9 @@ def uwidslice(string, start=0, end=maxint):
 			chars.append(c)
 	return "".join(chars[start:end])
 
-def utf_byte_length(string):
-	"""Return the byte length of one utf character"""
-	firstord = ord(string[0])
-	if firstord < 0b01111111:
-		return 1
-	if firstord < 0b10111111:
-		return 1  # invalid
-	if firstord < 0b11011111:
-		return 2
-	if firstord < 0b11101111:
-		return 3
-	if firstord < 0b11110100:
-		return 4
-	return 1  # invalid
-
 def utf_char_width(string):
 	"""Return the width of a single character"""
-	u = _utf_char_to_int(string)
+	u = ord(string)
 	if u < 0x1100:
 		return NARROW
 	# Hangul Jamo init. constonants
@@ -117,10 +107,3 @@ def utf_char_width(string):
 	if u >= 0x30000 and u <= 0x3FFFD:
 		return WIDE
 	return NARROW  # invalid (?)
-
-def _utf_char_to_int(string):
-	# Squash the last 6 bits of each byte together to an integer
-	u = 0
-	for c in string:
-		u = (u << 6) | (ord(c) & 0b00111111)
-	return u
diff --git a/ranger/gui/curses_shortcuts.py b/ranger/gui/curses_shortcuts.py
index 3df45700..65886d7e 100644
--- a/ranger/gui/curses_shortcuts.py
+++ b/ranger/gui/curses_shortcuts.py
@@ -51,9 +51,12 @@ class CursesShortcuts(SettingsAware):
 			pass
 		except UnicodeEncodeError:
 			try:
-				self.win.addstr(*(ascii_only(obj) for obj in args))
-			except (_curses.error, TypeError):
-				pass
+				self.win.addstr(*(obj.encode('utf8') for obj in args))
+			except UnicodeEncodeError:
+				try:
+					self.win.addstr(*(ascii_only(obj) for obj in args))
+				except (_curses.error, TypeError):
+					pass
 
 	def addnstr(self, *args):
 		try:
diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py
index 67ff609e..fba9f783 100644
--- a/test/tc_utfwidth.py
+++ b/test/tc_utfwidth.py
@@ -29,11 +29,6 @@ a_katakana = "ア"  # width = 2, bytes = 3
 # need one with width = 1 & bytes = 3
 
 class Test(TestCase):
-	def test_utf_byte_length(self):
-		self.assertEqual(1, utf_byte_length(a_ascii))
-		self.assertEqual(2, utf_byte_length(a_umlaut))
-		self.assertEqual(3, utf_byte_length(a_katakana))
-
 	def test_uwid(self):
 		self.assertEqual(1, uwid(a_ascii))
 		self.assertEqual(1, uwid(a_umlaut))
115' href='/akkartik/mu/commit/310copy-bytes.subx?h=main&id=71418907f69cd29ea9a07e7b2f80d51bcec3bba2'>71418907 ^
a148b23a ^








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157