summary refs log tree commit diff stats
path: root/ranger/ext
diff options
context:
space:
mode:
authorhut <hut@lavabit.com>2010-05-17 20:12:35 +0200
committerhut <hut@lavabit.com>2010-05-17 20:12:35 +0200
commitc184e88b5caa66e1cda90019e6e74e0036a24959 (patch)
tree58cdbaee5a54f9a039396faf9789d242afd47ec8 /ranger/ext
parentce421875249b2ce9c6eacbade9b750e46ae80a20 (diff)
parentd213c6b6eb252df03e4fbbe97e7e71876da8c309 (diff)
downloadranger-c184e88b5caa66e1cda90019e6e74e0036a24959.tar.gz
Merge branch 'utf'
Conflicts:
	ranger/__main__.py
Diffstat (limited to 'ranger/ext')
-rw-r--r--ranger/ext/utfwidth.py109
1 files changed, 109 insertions, 0 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
new file mode 100644
index 00000000..a506c676
--- /dev/null
+++ b/ranger/ext/utfwidth.py
@@ -0,0 +1,109 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+# Copyright (C) 2004, 2005  Timo Hirvonen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# ----
+# This file contains portions of code from cmus (uchar.c).
+
+NARROW = 1
+WIDE = 2
+
+def uwid(string):
+	"""Return the width of a string"""
+	end = len(string)
+	i = 0
+	width = 0
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		width += utf_char_width(string[i:i+bytelen])
+		i += bytelen
+	return width
+
+def uchars(string):
+	"""Return a list with one string for each character"""
+	end = len(string)
+	i = 0
+	result = []
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		result.append(string[i:i+bytelen])
+		i += bytelen
+	return result
+
+def utf_byte_length(string):
+	"""Return the byte length of one utf character"""
+	firstord = ord(string[0])
+	if firstord < 0b01111111:
+		return 1
+	if firstord < 0b10111111:
+		return 1  # invalid
+	if firstord < 0b11011111:
+		return 2
+	if firstord < 0b11101111:
+		return 3
+	if firstord < 0b11110100:
+		return 4
+	return 1  # invalid
+
+def utf_char_width(string):
+	"""Return the width of a single character"""
+	u = _utf_char_to_int(string)
+	if u < 0x1100:
+		return NARROW
+	# Hangul Jamo init. constonants
+	if u <= 0x115F:
+		return WIDE
+	# Angle Brackets
+	if u == 0x2329 or u == 0x232A:
+		return WIDE
+	if u < 0x2e80:
+		return NARROW
+	# CJK ... Yi
+	if u < 0x302A:
+		return WIDE
+	if u <= 0x302F:
+		return NARROW
+	if u == 0x303F or u == 0x3099 or u == 0x309a:
+		return NARROW
+	# CJK ... Yi
+	if u <= 0xA4CF:
+		return WIDE
+	# Hangul Syllables
+	if u >= 0xAC00 and u <= 0xD7A3:
+		return WIDE
+	# CJK Compatibility Ideographs
+	if u >= 0xF900 and u <= 0xFAFF:
+		return WIDE
+	# CJK Compatibility Forms
+	if u >= 0xFE30 and u <= 0xFE6F:
+		return WIDE
+	# Fullwidth Forms
+	if u >= 0xFF00 and u <= 0xFF60 or u >= 0xFFE0 and u <= 0xFFE6:
+		return WIDE
+	# CJK Extra Stuff
+	if u >= 0x20000 and u <= 0x2FFFD:
+		return WIDE
+	# ?
+	if u >= 0x30000 and u <= 0x3FFFD:
+		return WIDE
+	return NARROW  # invalid (?)
+
+def _utf_char_to_int(string):
+	# Squash the last 6 bits of each byte together to an integer
+	u = 0
+	for c in string:
+		u = (u << 6) | (ord(c) & 0b00111111)
+	return u