summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorhut <hut@lavabit.com>2010-05-04 23:29:54 +0200
committerhut <hut@lavabit.com>2010-05-10 21:04:47 +0200
commitccbe8b8d13ebdad09d282da51d118670a566cba5 (patch)
tree469773a104a0aeb0a995f5d2acd9f9a91fb635c0
parent338bbba4a14a21c82a3d4849b075fddddad9cee9 (diff)
downloadranger-ccbe8b8d13ebdad09d282da51d118670a566cba5.tar.gz
attempt to fix utf issues (wrong calculation of width)
-rw-r--r--ranger/ext/utfwidth.py64
-rw-r--r--ranger/gui/bar.py4
-rw-r--r--ranger/gui/widgets/console.py5
-rw-r--r--test/tc_utfwidth.py42
4 files changed, 112 insertions, 3 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
new file mode 100644
index 00000000..bbc67deb
--- /dev/null
+++ b/ranger/ext/utfwidth.py
@@ -0,0 +1,64 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+# Copyright (C) 2004, 2005  Timo Hirvonen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# ----
+# This file contains portions of code from cmus (uchar.c).
+
+NARROW = 1
+WIDE = 2
+
+def utf_byte_length(string):
+	"""Return the byte length of one utf character"""
+	firstord = ord(string[0])
+	if firstord < 0x01111111:
+		return 1
+	if firstord < 0x10111111:
+		return 0  # invalid
+	if firstord < 0x11011111:
+		return min(2, len(string))
+	if firstord < 0x11101111:
+		return min(3, len(string))
+	if firstord < 0x11110100:
+		return min(4, len(string))
+	return 0  # invalid
+
+def utf_char_width(string):
+	# XXX
+	u = _utf_char_to_int(string)
+	if u < 0x1100:
+		return NARROW
+	else:
+		return WIDE
+
+def _utf_char_to_int(string):
+	u = 0
+	for c in string:
+		u = (u << 6) | (ord(c) & 0b00111111)
+	return u
+
+def uwid(string):
+	end = len(string)
+	i = 0
+	width = 0
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		if bytelen:
+			width += utf_char_width(string[i:i+bytelen])
+		else:
+			width += 1
+		i += bytelen
+	return width
diff --git a/ranger/gui/bar.py b/ranger/gui/bar.py
index f5e34eb1..03ed2f78 100644
--- a/ranger/gui/bar.py
+++ b/ranger/gui/bar.py
@@ -13,6 +13,8 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from ranger.ext.utfwidth import uwid
+
 class Bar(object):
 	left = None
 	right = None
@@ -132,7 +134,7 @@ class ColoredString(object):
 		self.string = self.string[:n]
 
 	def __len__(self):
-		return len(self.string)
+		return uwid(self.string)
 
 	def __str__(self):
 		return self.string
diff --git a/ranger/gui/widgets/console.py b/ranger/gui/widgets/console.py
index fa9e438e..30872639 100644
--- a/ranger/gui/widgets/console.py
+++ b/ranger/gui/widgets/console.py
@@ -27,6 +27,7 @@ from ranger.gui.widgets.console_mode import is_valid_mode, mode_to_class
 from ranger import log, relpath_conf
 from ranger.core.runner import ALLOWED_FLAGS
 from ranger.ext.shell_escape import shell_quote
+from ranger.ext.utfwidth import uwid
 from ranger.container.keymap import CommandArgs
 from ranger.ext.get_executables import get_executables
 from ranger.ext.direction import Direction
@@ -105,8 +106,8 @@ class Console(Widget):
 
 	def finalize(self):
 		try:
-			self.fm.ui.win.move(self.y,
-					self.x + min(self.wid-1, self.pos + len(self.prompt)))
+			xpos = uwid(self.line[0:self.pos]) + len(self.prompt)
+			self.fm.ui.win.move(self.y, self.x + min(self.wid-1, xpos))
 		except:
 			pass
 
diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py
new file mode 100644
index 00000000..cf564990
--- /dev/null
+++ b/test/tc_utfwidth.py
@@ -0,0 +1,42 @@
+# -*- encoding: utf8 -*-
+# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+if __name__ == '__main__': from __init__ import init; init()
+
+from unittest import TestCase, main
+from ranger.ext.utfwidth import *
+
+a_ascii = "a"      # width = 1, bytes = 1
+a_umlaut = "ä"     # width = 1, bytes = 2
+a_katakana = "ア"  # width = 2, bytes = 3
+# need one with width = 1 & bytes = 3
+
+class Test(TestCase):
+	def test_utf_byte_length(self):
+		self.assertEqual(1, utf_byte_length(a_ascii[0]))
+		self.assertEqual(2, utf_byte_length(a_umlaut[0]))
+		self.assertEqual(3, utf_byte_length(a_katakana[0]))
+
+	def test_uwid(self):
+		self.assertEqual(1, uwid(a_ascii))
+		self.assertEqual(1, uwid(a_umlaut))
+		self.assertEqual(2, uwid(a_katakana))
+		self.assertEqual(3, uwid(a_katakana + a_umlaut))
+		self.assertEqual(4, uwid("asdf"))
+		self.assertEqual(5, uwid("löööl"))
+		self.assertEqual(6, uwid("バババ"))
+
+if __name__ == '__main__': main()
title='Blame the previous revision' href='/akspecs/ranger/blame/Makefile?id=fa0a2087880d9c5c6000eb4e583f31a0645ea5ad'>^
b0a216f5 ^

a0077554 ^













88b4d374 ^

a0077554 ^

2c5ea01d ^
582f3519 ^
b06433bc ^
a65a2695 ^

582f3519 ^
94c5d83e ^
dee6cfa6 ^
e9e4b4ff ^
b0a216f5 ^
76612b05 ^
ececd03e ^
b0a216f5 ^
e9e4b4ff ^





612b8b8b ^
e9e4b4ff ^
52403c53 ^
ad51cca4 ^
08e43b29 ^


ad51cca4 ^






b3d031a9 ^
c5f776ed ^
9ed36720 ^
b3d031a9 ^
bc2c5619 ^
c5f776ed ^

9ed36720 ^
b3d031a9 ^
9c69b4ae ^
c5f776ed ^

9ed36720 ^
9d82571b ^

5fca2a0b ^
9cf43c17 ^
5fca2a0b ^
9c69b4ae ^
c5f776ed ^

9ed36720 ^
c5f776ed ^
9c69b4ae ^
c5f776ed ^
88b4d374 ^
9ed36720 ^
9c69b4ae ^
88b4d374 ^
e91ae587 ^
9ed36720 ^
5f29f89a ^
9c69b4ae ^
e91ae587 ^
f2c8a7ff ^
9ed36720 ^
f2c8a7ff ^
9c69b4ae ^
f2c8a7ff ^
e416bfe7 ^
9ed36720 ^
5fca2a0b ^
6045dc10 ^
0ca79709 ^




6045dc10 ^
0ca79709 ^





c7720fff ^
8d21b83c ^


e9e4b4ff ^
ececd03e ^
e9e4b4ff ^
0c2c782d ^
636d9393 ^
b0a216f5 ^
c6afc196 ^

c0d63e78 ^


ececd03e ^
f2c8a7ff ^

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167