summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorhut <hut@lavabit.com>2010-05-04 23:29:54 +0200
committerhut <hut@lavabit.com>2010-05-10 21:04:47 +0200
commitccbe8b8d13ebdad09d282da51d118670a566cba5 (patch)
tree469773a104a0aeb0a995f5d2acd9f9a91fb635c0
parent338bbba4a14a21c82a3d4849b075fddddad9cee9 (diff)
downloadranger-ccbe8b8d13ebdad09d282da51d118670a566cba5.tar.gz
attempt to fix utf issues (wrong calculation of width)
-rw-r--r--ranger/ext/utfwidth.py64
-rw-r--r--ranger/gui/bar.py4
-rw-r--r--ranger/gui/widgets/console.py5
-rw-r--r--test/tc_utfwidth.py42
4 files changed, 112 insertions, 3 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
new file mode 100644
index 00000000..bbc67deb
--- /dev/null
+++ b/ranger/ext/utfwidth.py
@@ -0,0 +1,64 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+# Copyright (C) 2004, 2005  Timo Hirvonen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# ----
+# This file contains portions of code from cmus (uchar.c).
+
+NARROW = 1
+WIDE = 2
+
+def utf_byte_length(string):
+	"""Return the byte length of one utf character"""
+	firstord = ord(string[0])
+	if firstord < 0x01111111:
+		return 1
+	if firstord < 0x10111111:
+		return 0  # invalid
+	if firstord < 0x11011111:
+		return min(2, len(string))
+	if firstord < 0x11101111:
+		return min(3, len(string))
+	if firstord < 0x11110100:
+		return min(4, len(string))
+	return 0  # invalid
+
+def utf_char_width(string):
+	# XXX
+	u = _utf_char_to_int(string)
+	if u < 0x1100:
+		return NARROW
+	else:
+		return WIDE
+
+def _utf_char_to_int(string):
+	u = 0
+	for c in string:
+		u = (u << 6) | (ord(c) & 0b00111111)
+	return u
+
+def uwid(string):
+	end = len(string)
+	i = 0
+	width = 0
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		if bytelen:
+			width += utf_char_width(string[i:i+bytelen])
+		else:
+			width += 1
+		i += bytelen
+	return width
diff --git a/ranger/gui/bar.py b/ranger/gui/bar.py
index f5e34eb1..03ed2f78 100644
--- a/ranger/gui/bar.py
+++ b/ranger/gui/bar.py
@@ -13,6 +13,8 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from ranger.ext.utfwidth import uwid
+
 class Bar(object):
 	left = None
 	right = None
@@ -132,7 +134,7 @@ class ColoredString(object):
 		self.string = self.string[:n]
 
 	def __len__(self):
-		return len(self.string)
+		return uwid(self.string)
 
 	def __str__(self):
 		return self.string
diff --git a/ranger/gui/widgets/console.py b/ranger/gui/widgets/console.py
index fa9e438e..30872639 100644
--- a/ranger/gui/widgets/console.py
+++ b/ranger/gui/widgets/console.py
@@ -27,6 +27,7 @@ from ranger.gui.widgets.console_mode import is_valid_mode, mode_to_class
 from ranger import log, relpath_conf
 from ranger.core.runner import ALLOWED_FLAGS
 from ranger.ext.shell_escape import shell_quote
+from ranger.ext.utfwidth import uwid
 from ranger.container.keymap import CommandArgs
 from ranger.ext.get_executables import get_executables
 from ranger.ext.direction import Direction
@@ -105,8 +106,8 @@ class Console(Widget):
 
 	def finalize(self):
 		try:
-			self.fm.ui.win.move(self.y,
-					self.x + min(self.wid-1, self.pos + len(self.prompt)))
+			xpos = uwid(self.line[0:self.pos]) + len(self.prompt)
+			self.fm.ui.win.move(self.y, self.x + min(self.wid-1, xpos))
 		except:
 			pass
 
diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py
new file mode 100644
index 00000000..cf564990
--- /dev/null
+++ b/test/tc_utfwidth.py
@@ -0,0 +1,42 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+if __name__ == '__main__': from __init__ import init; init()
+
+from unittest import TestCase, main
+from ranger.ext.utfwidth import *
+
+a_ascii = "a"      # width = 1, bytes = 1
+a_umlaut = "ä"     # width = 1, bytes = 2
+a_katakana = "ア"  # width = 2, bytes = 3
+# need one with width = 1 & bytes = 3
+
+class Test(TestCase):
+	def test_utf_byte_length(self):
+		self.assertEqual(1, utf_byte_length(a_ascii[0]))
+		self.assertEqual(2, utf_byte_length(a_umlaut[0]))
+		self.assertEqual(3, utf_byte_length(a_katakana[0]))
+
+	def test_uwid(self):
+		self.assertEqual(1, uwid(a_ascii))
+		self.assertEqual(1, uwid(a_umlaut))
+		self.assertEqual(2, uwid(a_katakana))
+		self.assertEqual(3, uwid(a_katakana + a_umlaut))
+		self.assertEqual(4, uwid("asdf"))
+		self.assertEqual(5, uwid("löööl"))
+		self.assertEqual(6, uwid("バババ"))
+
+if __name__ == '__main__': main()