trying to fix utf shit...

author: hut <hut@lavabit.com> 2011-10-01 16:07:04 +0200
committer: hut <hut@lavabit.com> 2011-10-01 16:07:04 +0200
commit: f4558377de4e869791f3e0966607b5d8f5d862b1 (patch)
tree: 6985d3b75ef7a21633de546e313f332d0efffdf6
parent: 8a68c275a22ef3a682448b2f4cfcd71c45a063b8 (diff)
download: ranger-f4558377de4e869791f3e0966607b5d8f5d862b1.tar.gz
4 files changed, 36 insertions, 135 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
deleted file mode 100644
index 0976fee1..00000000
--- a/ranger/ext/utfwidth.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# -*- encoding: utf8 -*-
-# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
-# Copyright (C) 2004, 2005  Timo Hirvonen
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-# ----
-# This file contains portions of code from cmus (uchar.c).
-
-NARROW = 1
-WIDE = 2
-
-def uwid(string):
-	"""Return the width of a string"""
-	end = len(string)
-	i = 0
-	width = 0
-	while i < end:
-		bytelen = utf_byte_length(string[i:])
-		width += utf_char_width(string[i:i+bytelen])
-		i += bytelen
-	return width
-
-def uchars(string):
-	"""Return a list with one string for each character"""
-	end = len(string)
-	i = 0
-	result = []
-	while i < end:
-		bytelen = utf_byte_length(string[i:])
-		result.append(string[i:i+bytelen])
-		i += bytelen
-	return result
-
-def utf_byte_length(string):
-	"""Return the byte length of one utf character"""
-	firstord = ord(string[0])
-	if firstord < 0b01111111:
-		return 1
-	if firstord < 0b10111111:
-		return 1  # invalid
-	if firstord < 0b11011111:
-		return 2
-	if firstord < 0b11101111:
-		return 3
-	if firstord < 0b11110100:
-		return 4
-	return 1  # invalid
-
-
-def utf_char_width(string):
-	"""Return the width of a single character"""
-	u = _utf_char_to_int(string)
-	return utf_char_width_(u)
-
-def utf_char_width_(u):
-	if u < 0x1100:
-		return NARROW
-	# Hangul Jamo init. constonants
-	if u <= 0x115F:
-		return WIDE
-	# Angle Brackets
-	if u == 0x2329 or u == 0x232A:
-		return WIDE
-	if u < 0x2e80:
-		return NARROW
-	# CJK ... Yi
-	if u < 0x302A:
-		return WIDE
-	if u <= 0x302F:
-		return NARROW
-	if u == 0x303F or u == 0x3099 or u == 0x309a:
-		return NARROW
-	# CJK ... Yi
-	if u <= 0xA4CF:
-		return WIDE
-	# Hangul Syllables
-	if u >= 0xAC00 and u <= 0xD7A3:
-		return WIDE
-	# CJK Compatibility Ideographs
-	if u >= 0xF900 and u <= 0xFAFF:
-		return WIDE
-	# CJK Compatibility Forms
-	if u >= 0xFE30 and u <= 0xFE6F:
-		return WIDE
-	# Fullwidth Forms
-	if u >= 0xFF00 and u <= 0xFF60 or u >= 0xFFE0 and u <= 0xFFE6:
-		return WIDE
-	# CJK Extra Stuff
-	if u >= 0x20000 and u <= 0x2FFFD:
-		return WIDE
-	# ?
-	if u >= 0x30000 and u <= 0x3FFFD:
-		return WIDE
-	return NARROW  # invalid (?)
-
-def _utf_char_to_int(string):
-	# Squash the last 6 bits of each byte together to an integer
-	u = 0
-	for c in string:
-		u = (u << 6) | (ord(c) & 0b00111111)
-	return u
diff --git a/ranger/ext/widestring.py b/ranger/ext/widestring.py
index c7230806..150a142e 100644
--- a/ranger/ext/widestring.py
+++ b/ranger/ext/widestring.py
@@ -19,11 +19,40 @@
 # This file contains portions of code from cmus (uchar.c).
 
 import sys
+from unicodedata import east_asian_width
 
+PY3 = sys.version > '3'
 ASCIIONLY = set(chr(c) for c in range(1, 128))
 NARROW = 1
 WIDE = 2
 
+def uwid(string):
+	"""Return the width of a string"""
+	if not PY3:
+		string = string.decode('utf-8', 'ignore')
+	return sum(utf_char_width(c) for c in string)
+#	end = len(string)
+#	i = 0
+#	width = 0
+#	while i < end:
+#		bytelen = utf_byte_length(string[i:])
+#		width += utf_char_width(string[i:i+bytelen])
+#		i += bytelen
+#	return width
+
+def uchars(string):
+	if not PY3:
+		string = string.decode('utf-8', 'ignore')
+	return list(string)
+	#end = len(string)
+	#i = 0
+	#result = []
+	#while i < end:
+		#bytelen = utf_byte_length(string[i:])
+		#result.append(string[i:i+bytelen])
+		#i += bytelen
+	#return result
+
 def _utf_char_to_int(string):
 	# Squash the last 6 bits of each byte together to an integer
 	if sys.version > '3':
@@ -76,18 +105,6 @@ def utf_char_width_(u):
 		return WIDE
 	return NARROW  # invalid (?)
 
-def uchars(string):
-	if sys.version >= '3':
-		return list(string)
-	end = len(string)
-	i = 0
-	result = []
-	while i < end:
-		bytelen = utf_byte_length(string[i:])
-		result.append(string[i:i+bytelen])
-		i += bytelen
-	return result
-
 
 def utf_byte_length(string):
 	"""Return the byte length of one utf character"""
@@ -110,8 +127,9 @@ def utf_byte_length(string):
 
 def utf_char_width(string):
 	"""Return the width of a single character"""
-	u = _utf_char_to_int(string)
-	return utf_char_width_(u)
+	if east_asian_width(string)[0] == 'W':
+		return WIDE
+	return NARROW
 
 
 def width(string):
diff --git a/ranger/gui/bar.py b/ranger/gui/bar.py
index aa5c9ab4..ef769ca5 100644
--- a/ranger/gui/bar.py
+++ b/ranger/gui/bar.py
@@ -13,7 +13,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from ranger.ext.utfwidth import uwid
+from ranger.ext.widestring import uwid
 
 class Bar(object):
 	left = None
diff --git a/ranger/gui/widgets/console.py b/ranger/gui/widgets/console.py
index 924f3557..92983da6 100644
--- a/ranger/gui/widgets/console.py
+++ b/ranger/gui/widgets/console.py
@@ -24,7 +24,7 @@ from collections import deque
 
 from . import Widget
 from ranger.ext.direction import Direction
-from ranger.ext.utfwidth import uwid, uchars, utf_char_width_
+from ranger.ext.widestring import uwid, uchars
 from ranger.container import History
 from ranger.container.history import HistoryEmptyException
 import ranger
@@ -87,12 +87,8 @@ class Console(Widget):
 
 	def finalize(self):
 		try:
-			if self.fm.py3:
-				xpos = sum(utf_char_width_(ord(c)) for c in self.line[0:self.pos]) \
-					+ len(self.prompt)
-			else:
-				xpos = uwid(self.line[0:self.pos]) + len(self.prompt)
-			self.fm.ui.win.move(self.y, self.x + min(self.wid-1, xpos))
+			pos = uwid(self.line[0:self.pos]) + len(self.prompt)
+			self.fm.ui.win.move(self.y, self.x + min(self.wid-1, pos))
 		except:
 			pass
author	hut <hut@lavabit.com>	2011-10-01 16:07:04 +0200
committer	hut <hut@lavabit.com>	2011-10-01 16:07:04 +0200
commit	f4558377de4e869791f3e0966607b5d8f5d862b1 (patch)
tree	6985d3b75ef7a21633de546e313f332d0efffdf6
parent	8a68c275a22ef3a682448b2f4cfcd71c45a063b8 (diff)
download	ranger-f4558377de4e869791f3e0966607b5d8f5d862b1.tar.gz