summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorhut <hut@lavabit.com>2010-05-17 17:28:34 +0200
committerhut <hut@lavabit.com>2010-05-17 17:29:16 +0200
commit9e435dcd5bcaf20f74f979f5ac79714172648226 (patch)
tree9433856d57c90e4495b828a0d3b8a27baf13caf6
parent30c8fb813f98268d93a091365f051413e4160155 (diff)
downloadranger-9e435dcd5bcaf20f74f979f5ac79714172648226.tar.gz
fixed utf stuff
-rw-r--r--ranger/ext/utfwidth.py60
-rw-r--r--ranger/gui/widgets/console.py9
-rw-r--r--test/tc_utfwidth.py6
3 files changed, 58 insertions, 17 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
index 2881a2a0..31440ef0 100644
--- a/ranger/ext/utfwidth.py
+++ b/ranger/ext/utfwidth.py
@@ -24,25 +24,61 @@ WIDE = 2
 def utf_byte_length(string):
 	"""Return the byte length of one utf character"""
 	firstord = ord(string[0])
-	if firstord < 0x01111111:
+	if firstord < 0b01111111:
 		return 1
-	if firstord < 0x10111111:
+	if firstord < 0b10111111:
 		return 1  # invalid
-	if firstord < 0x11011111:
-		return min(2, len(string))
-	if firstord < 0x11101111:
-		return min(3, len(string))
-	if firstord < 0x11110100:
-		return min(4, len(string))
+	if firstord < 0b11011111:
+		return 2
+	if firstord < 0b11101111:
+		return 3
+	if firstord < 0b11110100:
+		return 4
 	return 1  # invalid
 
 def utf_char_width(string):
-	# XXX
+	"""Return the width of a single character"""
+	# Inspired by cmus uchar.c
 	u = _utf_char_to_int(string)
 	if u < 0x1100:
 		return NARROW
-	else:
+	# Hangul Jamo init. constonants
+	if u <= 0x115F:
 		return WIDE
+	# Angle Brackets
+	if u == 0x2329 or u == 0x232A:
+		return WIDE
+	if u < 0x2e80:
+		return NARROW
+	# CJK ... Yi
+	if u < 0x302A:
+		return WIDE
+	if u <= 0x302F:
+		return NARROW
+	if u == 0x303F or u == 0x3099 or u == 0x309a:
+		return NARROW
+	# CJK ... Yi
+	if u <= 0xA4CF:
+		return WIDE
+	# Hangul Syllables
+	if u >= 0xAC00 and u <= 0xD7A3:
+		return WIDE
+	# CJK Compatibility Ideographs
+	if u >= 0xF900 and u <= 0xFAFF:
+		return WIDE
+	# CJK Compatibility Forms
+	if u >= 0xFE30 and u <= 0xFE6F:
+		return WIDE
+	# Fullwidth Forms
+	if u >= 0xFF00 and u <= 0xFF60 or u >= 0xFFE0 and u <= 0xFFE6:
+		return WIDE
+	# CJK Extra Stuff
+	if u >= 0x20000 and u <= 0x2FFFD:
+		return WIDE
+	# ?
+	if u >= 0x30000 and u <= 0x3FFFD:
+		return WIDE
+	return NARROW  # invalid
 
 def _utf_char_to_int(string):
 	# Squash the last 6 bits of each byte together to an integer
@@ -52,16 +88,18 @@ def _utf_char_to_int(string):
 	return u
 
 def uwid(string):
+	"""Return the width of a string"""
 	end = len(string)
 	i = 0
 	width = 0
 	while i < end:
 		bytelen = utf_byte_length(string[i:])
-			width += 1
+		width += utf_char_width(string[i:i+bytelen])
 		i += bytelen
 	return width
 
 def uchars(string):
+	"""Return a list with one string for each character"""
 	end = len(string)
 	i = 0
 	result = []
diff --git a/ranger/gui/widgets/console.py b/ranger/gui/widgets/console.py
index 5a538ce2..51ecf3b2 100644
--- a/ranger/gui/widgets/console.py
+++ b/ranger/gui/widgets/console.py
@@ -223,11 +223,14 @@ class Console(Widget):
 	def move(self, **keywords):
 		direction = Direction(keywords)
 		if direction.horizontal():
-			self.pos = direction.move(
+			uc = uchars(self.line)
+			upos = len(uchars(self.line[:self.pos]))
+			newupos = direction.move(
 					direction=direction.right(),
 					minimum=0,
-					maximum=len(self.line) + 1,
-					current=self.pos)
+					maximum=len(uc) + 1,
+					current=upos)
+			self.pos = len(''.join(uc[:newupos]))
 
 	def delete_rest(self, direction):
 		self.tab_deque = None
diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py
index cf564990..d8ffbe1d 100644
--- a/test/tc_utfwidth.py
+++ b/test/tc_utfwidth.py
@@ -26,9 +26,9 @@ a_katakana = "ア"  # width = 2, bytes = 3
 
 class Test(TestCase):
 	def test_utf_byte_length(self):
-		self.assertEqual(1, utf_byte_length(a_ascii[0]))
-		self.assertEqual(2, utf_byte_length(a_umlaut[0]))
-		self.assertEqual(3, utf_byte_length(a_katakana[0]))
+		self.assertEqual(1, utf_byte_length(a_ascii))
+		self.assertEqual(2, utf_byte_length(a_umlaut))
+		self.assertEqual(3, utf_byte_length(a_katakana))
 
 	def test_uwid(self):
 		self.assertEqual(1, uwid(a_ascii))