ext.utfwidth: updated algorithms

author: hut <hut@lavabit.com> 2010-09-30 01:08:40 +0200
committer: hut <hut@lavabit.com> 2010-09-30 01:08:40 +0200
commit: 512f386be8753775ec824a6d9cbaf6527d50eda4 (patch)
tree: 1799f231efa6df1b49602a142b6c3378ff8d6967
parent: d4900452fca51685349966d527d173fdefe83f08 (diff)
download: ranger-512f386be8753775ec824a6d9cbaf6527d50eda4.tar.gz
3 files changed, 20 insertions, 39 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
index 364db757..762f3894 100644
--- a/ranger/ext/utfwidth.py
+++ b/ranger/ext/utfwidth.py
@@ -28,20 +28,25 @@ WIDE = 2
 
 def uwid(string, count=maxint):
 	"""Return the width of a string"""
-	end = len(string)
-	i = 0
+	try:
+		string = string.decode('utf8', 'replace')
+	except AttributeError:
+		pass
 	width = 0
-	while i < end and count:
-		bytelen = utf_byte_length(string[i:])
-		width += utf_char_width(string[i:i+bytelen])
-		i += bytelen
+	for c in string:
+		width += utf_char_width(c)
 		count -= 1
+		if not count:
+			break
 	return width
 
 def uchars(string):
 	"""Return a list with one string for each character"""
-	end = len(string)
-	i = 0
+	try:
+		string = string.decode('utf-8', 'replace')
+	except AttributeError:
+		pass
+	return list(string)
 	result = []
 	while i < end:
 		bytelen = utf_byte_length(string[i:])
@@ -60,24 +65,9 @@ def uwidslice(string, start=0, end=maxint):
 			chars.append(c)
 	return "".join(chars[start:end])
 
-def utf_byte_length(string):
-	"""Return the byte length of one utf character"""
-	firstord = ord(string[0])
-	if firstord < 0b01111111:
-		return 1
-	if firstord < 0b10111111:
-		return 1  # invalid
-	if firstord < 0b11011111:
-		return 2
-	if firstord < 0b11101111:
-		return 3
-	if firstord < 0b11110100:
-		return 4
-	return 1  # invalid
-
 def utf_char_width(string):
 	"""Return the width of a single character"""
-	u = _utf_char_to_int(string)
+	u = ord(string)
 	if u < 0x1100:
 		return NARROW
 	# Hangul Jamo init. constonants
@@ -117,10 +107,3 @@ def utf_char_width(string):
 	if u >= 0x30000 and u <= 0x3FFFD:
 		return WIDE
 	return NARROW  # invalid (?)
-
-def _utf_char_to_int(string):
-	# Squash the last 6 bits of each byte together to an integer
-	u = 0
-	for c in string:
-		u = (u << 6) | (ord(c) & 0b00111111)
-	return u
diff --git a/ranger/gui/curses_shortcuts.py b/ranger/gui/curses_shortcuts.py
index 3df45700..65886d7e 100644
--- a/ranger/gui/curses_shortcuts.py
+++ b/ranger/gui/curses_shortcuts.py
@@ -51,9 +51,12 @@ class CursesShortcuts(SettingsAware):
 			pass
 		except UnicodeEncodeError:
 			try:
-				self.win.addstr(*(ascii_only(obj) for obj in args))
-			except (_curses.error, TypeError):
-				pass
+				self.win.addstr(*(obj.encode('utf8') for obj in args))
+			except UnicodeEncodeError:
+				try:
+					self.win.addstr(*(ascii_only(obj) for obj in args))
+				except (_curses.error, TypeError):
+					pass
 
 	def addnstr(self, *args):
 		try:
diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py
index 67ff609e..fba9f783 100644
--- a/test/tc_utfwidth.py
+++ b/test/tc_utfwidth.py
@@ -29,11 +29,6 @@ a_katakana = "ア"  # width = 2, bytes = 3
 # need one with width = 1 & bytes = 3
 
 class Test(TestCase):
-	def test_utf_byte_length(self):
-		self.assertEqual(1, utf_byte_length(a_ascii))
-		self.assertEqual(2, utf_byte_length(a_umlaut))
-		self.assertEqual(3, utf_byte_length(a_katakana))
-
 	def test_uwid(self):
 		self.assertEqual(1, uwid(a_ascii))
 		self.assertEqual(1, uwid(a_umlaut))
author	hut <hut@lavabit.com>	2010-09-30 01:08:40 +0200
committer	hut <hut@lavabit.com>	2010-09-30 01:08:40 +0200
commit	512f386be8753775ec824a6d9cbaf6527d50eda4 (patch)
tree	1799f231efa6df1b49602a142b6c3378ff8d6967
parent	d4900452fca51685349966d527d173fdefe83f08 (diff)
download	ranger-512f386be8753775ec824a6d9cbaf6527d50eda4.tar.gz