Merge branch 'utf'

Conflicts: ranger/__main__.py
author: hut <hut@lavabit.com> 2010-05-17 20:12:35 +0200
committer: hut <hut@lavabit.com> 2010-05-17 20:12:35 +0200
commit: c184e88b5caa66e1cda90019e6e74e0036a24959 (patch)
tree: 58cdbaee5a54f9a039396faf9789d242afd47ec8
parent: ce421875249b2ce9c6eacbade9b750e46ae80a20 (diff)
parent: d213c6b6eb252df03e4fbbe97e7e71876da8c309 (diff)
download: ranger-c184e88b5caa66e1cda90019e6e74e0036a24959.tar.gz
5 files changed, 180 insertions, 25 deletions
diff --git a/ranger/__main__.py b/ranger/__main__.py
index 916c5a0e..9317d428 100644
--- a/ranger/__main__.py
+++ b/ranger/__main__.py
@@ -147,7 +147,7 @@ def main():
 		print(errormessage)
 		print('ranger requires the python curses module. Aborting.')
 		sys.exit(1)
-	from locale import getdefaultlocale, setlocale, LC_ALL
+	import locale
 	import ranger
 	from ranger.ext import curses_interrupt_handler
 	from ranger.core.runner import Runner
@@ -158,17 +158,8 @@ def main():
 	from ranger.shared import (EnvironmentAware, FileManagerAware,
 			SettingsAware)
 
-	# Ensure that a utf8 locale is set.
-	try:
-		if getdefaultlocale()[1] not in ('utf8', 'UTF-8'):
-			for locale in ('en_US.utf8', 'en_US.UTF-8'):
-				try: setlocale(LC_ALL, locale)
-				except: pass
-				else: break
-			else: setlocale(LC_ALL, '')
-		else: setlocale(LC_ALL, '')
-	except:
-		print("Warning: Unable to set locale.  Expect encoding problems.")
+	try: locale.setlocale(locale.LC_ALL, '')
+	except: print("Warning: Unable to set locale.  Expect encoding problems.")
 
 	arg = parse_arguments()
 	ranger.arg = arg
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
new file mode 100644
index 00000000..a506c676
--- /dev/null
+++ b/ranger/ext/utfwidth.py
@@ -0,0 +1,109 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+# Copyright (C) 2004, 2005  Timo Hirvonen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# ----
+# This file contains portions of code from cmus (uchar.c).
+
+NARROW = 1
+WIDE = 2
+
+def uwid(string):
+	"""Return the width of a string"""
+	end = len(string)
+	i = 0
+	width = 0
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		width += utf_char_width(string[i:i+bytelen])
+		i += bytelen
+	return width
+
+def uchars(string):
+	"""Return a list with one string for each character"""
+	end = len(string)
+	i = 0
+	result = []
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		result.append(string[i:i+bytelen])
+		i += bytelen
+	return result
+
+def utf_byte_length(string):
+	"""Return the byte length of one utf character"""
+	firstord = ord(string[0])
+	if firstord < 0b01111111:
+		return 1
+	if firstord < 0b10111111:
+		return 1  # invalid
+	if firstord < 0b11011111:
+		return 2
+	if firstord < 0b11101111:
+		return 3
+	if firstord < 0b11110100:
+		return 4
+	return 1  # invalid
+
+def utf_char_width(string):
+	"""Return the width of a single character"""
+	u = _utf_char_to_int(string)
+	if u < 0x1100:
+		return NARROW
+	# Hangul Jamo init. constonants
+	if u <= 0x115F:
+		return WIDE
+	# Angle Brackets
+	if u == 0x2329 or u == 0x232A:
+		return WIDE
+	if u < 0x2e80:
+		return NARROW
+	# CJK ... Yi
+	if u < 0x302A:
+		return WIDE
+	if u <= 0x302F:
+		return NARROW
+	if u == 0x303F or u == 0x3099 or u == 0x309a:
+		return NARROW
+	# CJK ... Yi
+	if u <= 0xA4CF:
+		return WIDE
+	# Hangul Syllables
+	if u >= 0xAC00 and u <= 0xD7A3:
+		return WIDE
+	# CJK Compatibility Ideographs
+	if u >= 0xF900 and u <= 0xFAFF:
+		return WIDE
+	# CJK Compatibility Forms
+	if u >= 0xFE30 and u <= 0xFE6F:
+		return WIDE
+	# Fullwidth Forms
+	if u >= 0xFF00 and u <= 0xFF60 or u >= 0xFFE0 and u <= 0xFFE6:
+		return WIDE
+	# CJK Extra Stuff
+	if u >= 0x20000 and u <= 0x2FFFD:
+		return WIDE
+	# ?
+	if u >= 0x30000 and u <= 0x3FFFD:
+		return WIDE
+	return NARROW  # invalid (?)
+
+def _utf_char_to_int(string):
+	# Squash the last 6 bits of each byte together to an integer
+	u = 0
+	for c in string:
+		u = (u << 6) | (ord(c) & 0b00111111)
+	return u
diff --git a/ranger/gui/bar.py b/ranger/gui/bar.py
index f5e34eb1..03ed2f78 100644
--- a/ranger/gui/bar.py
+++ b/ranger/gui/bar.py
@@ -13,6 +13,8 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from ranger.ext.utfwidth import uwid
+
 class Bar(object):
 	left = None
 	right = None
@@ -132,7 +134,7 @@ class ColoredString(object):
 		self.string = self.string[:n]
 
 	def __len__(self):
-		return len(self.string)
+		return uwid(self.string)
 
 	def __str__(self):
 		return self.string
diff --git a/ranger/gui/widgets/console.py b/ranger/gui/widgets/console.py
index fa9e438e..3bb41482 100644
--- a/ranger/gui/widgets/console.py
+++ b/ranger/gui/widgets/console.py
@@ -27,9 +27,11 @@ from ranger.gui.widgets.console_mode import is_valid_mode, mode_to_class
 from ranger import log, relpath_conf
 from ranger.core.runner import ALLOWED_FLAGS
 from ranger.ext.shell_escape import shell_quote
+from ranger.ext.utfwidth import uwid
 from ranger.container.keymap import CommandArgs
 from ranger.ext.get_executables import get_executables
 from ranger.ext.direction import Direction
+from ranger.ext.utfwidth import uwid, uchars
 from ranger.container import History
 from ranger.container.history import HistoryEmptyException
 import ranger
@@ -97,16 +99,17 @@ class Console(Widget):
 
 		self.win.erase()
 		self.addstr(0, 0, self.prompt)
-		overflow = -self.wid + len(self.prompt) + len(self.line) + 1
+		overflow = -self.wid + len(self.prompt) + uwid(self.line) + 1
 		if overflow > 0: 
+			#XXX: cut uft-char-wise, consider width
 			self.addstr(self.line[overflow:])
 		else:
 			self.addstr(self.line)
 
 	def finalize(self):
 		try:
-			self.fm.ui.win.move(self.y,
-					self.x + min(self.wid-1, self.pos + len(self.prompt)))
+			xpos = uwid(self.line[0:self.pos]) + len(self.prompt)
+			self.fm.ui.win.move(self.y, self.x + min(self.wid-1, xpos))
 		except:
 			pass
 
@@ -221,11 +224,15 @@ class Console(Widget):
 	def move(self, **keywords):
 		direction = Direction(keywords)
 		if direction.horizontal():
-			self.pos = direction.move(
+			# Ensure that the pointer is moved utf-char-wise
+			uc = uchars(self.line)
+			upos = len(uchars(self.line[:self.pos]))
+			newupos = direction.move(
 					direction=direction.right(),
 					minimum=0,
-					maximum=len(self.line) + 1,
-					current=self.pos)
+					maximum=len(uc) + 1,
+					current=upos)
+			self.pos = len(''.join(uc[:newupos]))
 
 	def delete_rest(self, direction):
 		self.tab_deque = None
@@ -259,12 +266,16 @@ class Console(Widget):
 
 	def delete(self, mod):
 		self.tab_deque = None
-		if mod == -1 and len(self.line) == 0:
-			self.close()
-		pos = self.pos + mod
-
-		self.line = self.line[0:pos] + self.line[pos+1:]
-		self.move(right=mod)
+		if mod == -1 and self.pos == 0:
+			if not self.line:
+				self.close()
+			return
+		# Delete utf-char-wise
+		uc = uchars(self.line)
+		upos = len(uchars(self.line[:self.pos])) + mod
+		left_part = ''.join(uc[:upos])
+		self.pos = len(left_part)
+		self.line = left_part + ''.join(uc[upos+1:])
 		self.on_line_change()
 
 	def execute(self):
diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py
new file mode 100644
index 00000000..d8ffbe1d
--- /dev/null
+++ b/test/tc_utfwidth.py
@@ -0,0 +1,42 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+if __name__ == '__main__': from __init__ import init; init()
+
+from unittest import TestCase, main
+from ranger.ext.utfwidth import *
+
+a_ascii = "a"      # width = 1, bytes = 1
+a_umlaut = "ä"     # width = 1, bytes = 2
+a_katakana = "ア"  # width = 2, bytes = 3
+# need one with width = 1 & bytes = 3
+
+class Test(TestCase):
+	def test_utf_byte_length(self):
+		self.assertEqual(1, utf_byte_length(a_ascii))
+		self.assertEqual(2, utf_byte_length(a_umlaut))
+		self.assertEqual(3, utf_byte_length(a_katakana))
+
+	def test_uwid(self):
+		self.assertEqual(1, uwid(a_ascii))
+		self.assertEqual(1, uwid(a_umlaut))
+		self.assertEqual(2, uwid(a_katakana))
+		self.assertEqual(3, uwid(a_katakana + a_umlaut))
+		self.assertEqual(4, uwid("asdf"))
+		self.assertEqual(5, uwid("löööl"))
+		self.assertEqual(6, uwid("バババ"))
+
+if __name__ == '__main__': main()
author	hut <hut@lavabit.com>	2010-05-17 20:12:35 +0200
committer	hut <hut@lavabit.com>	2010-05-17 20:12:35 +0200
commit	c184e88b5caa66e1cda90019e6e74e0036a24959 (patch)
tree	58cdbaee5a54f9a039396faf9789d242afd47ec8
parent	ce421875249b2ce9c6eacbade9b750e46ae80a20 (diff)
parent	d213c6b6eb252df03e4fbbe97e7e71876da8c309 (diff)
download	ranger-c184e88b5caa66e1cda90019e6e74e0036a24959.tar.gz