attempt to fix utf issues (wrong calculation of width)

author: hut <hut@lavabit.com> 2010-05-04 23:29:54 +0200
committer: hut <hut@lavabit.com> 2010-05-10 21:04:47 +0200
commit: ccbe8b8d13ebdad09d282da51d118670a566cba5 (patch)
tree: 469773a104a0aeb0a995f5d2acd9f9a91fb635c0 /ranger/ext/utfwidth.py
parent: 338bbba4a14a21c82a3d4849b075fddddad9cee9 (diff)
download: ranger-ccbe8b8d13ebdad09d282da51d118670a566cba5.tar.gz
1 files changed, 64 insertions, 0 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
new file mode 100644
index 00000000..bbc67deb
--- /dev/null
+++ b/ranger/ext/utfwidth.py
@@ -0,0 +1,64 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+# Copyright (C) 2004, 2005  Timo Hirvonen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# ----
+# This file contains portions of code from cmus (uchar.c).
+
+NARROW = 1
+WIDE = 2
+
+def utf_byte_length(string):
+	"""Return the byte length of one utf character"""
+	firstord = ord(string[0])
+	if firstord < 0x01111111:
+		return 1
+	if firstord < 0x10111111:
+		return 0  # invalid
+	if firstord < 0x11011111:
+		return min(2, len(string))
+	if firstord < 0x11101111:
+		return min(3, len(string))
+	if firstord < 0x11110100:
+		return min(4, len(string))
+	return 0  # invalid
+
+def utf_char_width(string):
+	# XXX
+	u = _utf_char_to_int(string)
+	if u < 0x1100:
+		return NARROW
+	else:
+		return WIDE
+
+def _utf_char_to_int(string):
+	u = 0
+	for c in string:
+		u = (u << 6) | (ord(c) & 0b00111111)
+	return u
+
+def uwid(string):
+	end = len(string)
+	i = 0
+	width = 0
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		if bytelen:
+			width += utf_char_width(string[i:i+bytelen])
+		else:
+			width += 1
+		i += bytelen
+	return width
author	hut <hut@lavabit.com>	2010-05-04 23:29:54 +0200
committer	hut <hut@lavabit.com>	2010-05-10 21:04:47 +0200
commit	ccbe8b8d13ebdad09d282da51d118670a566cba5 (patch)
tree	469773a104a0aeb0a995f5d2acd9f9a91fb635c0 /ranger/ext/utfwidth.py
parent	338bbba4a14a21c82a3d4849b075fddddad9cee9 (diff)
download	ranger-ccbe8b8d13ebdad09d282da51d118670a566cba5.tar.gz