diff options
author | hut <hut@lavabit.com> | 2010-05-04 23:29:54 +0200 |
---|---|---|
committer | hut <hut@lavabit.com> | 2010-05-10 21:04:47 +0200 |
commit | ccbe8b8d13ebdad09d282da51d118670a566cba5 (patch) | |
tree | 469773a104a0aeb0a995f5d2acd9f9a91fb635c0 | |
parent | 338bbba4a14a21c82a3d4849b075fddddad9cee9 (diff) | |
download | ranger-ccbe8b8d13ebdad09d282da51d118670a566cba5.tar.gz |
attempt to fix utf issues (wrong calculation of width)
-rw-r--r-- | ranger/ext/utfwidth.py | 64 | ||||
-rw-r--r-- | ranger/gui/bar.py | 4 | ||||
-rw-r--r-- | ranger/gui/widgets/console.py | 5 | ||||
-rw-r--r-- | test/tc_utfwidth.py | 42 |
4 files changed, 112 insertions, 3 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py new file mode 100644 index 00000000..bbc67deb --- /dev/null +++ b/ranger/ext/utfwidth.py @@ -0,0 +1,64 @@ +# -*- encoding: utf8 -*- +# Copyright (C) 2009, 2010 Roman Zimbelmann <romanz@lavabit.com> +# Copyright (C) 2004, 2005 Timo Hirvonen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# ---- +# This file contains portions of code from cmus (uchar.c). + +NARROW = 1 +WIDE = 2 + +def utf_byte_length(string): + """Return the byte length of one utf character""" + firstord = ord(string[0]) + if firstord < 0x01111111: + return 1 + if firstord < 0x10111111: + return 0 # invalid + if firstord < 0x11011111: + return min(2, len(string)) + if firstord < 0x11101111: + return min(3, len(string)) + if firstord < 0x11110100: + return min(4, len(string)) + return 0 # invalid + +def utf_char_width(string): + # XXX + u = _utf_char_to_int(string) + if u < 0x1100: + return NARROW + else: + return WIDE + +def _utf_char_to_int(string): + u = 0 + for c in string: + u = (u << 6) | (ord(c) & 0b00111111) + return u + +def uwid(string): + end = len(string) + i = 0 + width = 0 + while i < end: + bytelen = utf_byte_length(string[i:]) + if bytelen: + width += utf_char_width(string[i:i+bytelen]) + else: + width += 1 + i += bytelen + return width diff --git a/ranger/gui/bar.py b/ranger/gui/bar.py index f5e34eb1..03ed2f78 100644 --- a/ranger/gui/bar.py +++ b/ranger/gui/bar.py @@ -13,6 +13,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +from ranger.ext.utfwidth import uwid + class Bar(object): left = None right = None @@ -132,7 +134,7 @@ class ColoredString(object): self.string = self.string[:n] def __len__(self): - return len(self.string) + return uwid(self.string) def __str__(self): return self.string diff --git a/ranger/gui/widgets/console.py b/ranger/gui/widgets/console.py index fa9e438e..30872639 100644 --- a/ranger/gui/widgets/console.py +++ b/ranger/gui/widgets/console.py @@ -27,6 +27,7 @@ from ranger.gui.widgets.console_mode import is_valid_mode, mode_to_class from ranger import log, relpath_conf from ranger.core.runner import ALLOWED_FLAGS from ranger.ext.shell_escape import shell_quote +from ranger.ext.utfwidth import uwid from ranger.container.keymap import CommandArgs from ranger.ext.get_executables import get_executables from ranger.ext.direction import Direction @@ -105,8 +106,8 @@ class Console(Widget): def finalize(self): try: - self.fm.ui.win.move(self.y, - self.x + min(self.wid-1, self.pos + len(self.prompt))) + xpos = uwid(self.line[0:self.pos]) + len(self.prompt) + self.fm.ui.win.move(self.y, self.x + min(self.wid-1, xpos)) except: pass diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py new file mode 100644 index 00000000..cf564990 --- /dev/null +++ b/test/tc_utfwidth.py @@ -0,0 +1,42 @@ +# -*- encoding: utf8 -*- +# Copyright (C) 2009, 2010 Roman Zimbelmann <romanz@lavabit.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +if __name__ == '__main__': from __init__ import init; init() + +from unittest import TestCase, main +from ranger.ext.utfwidth import * + +a_ascii = "a" # width = 1, bytes = 1 +a_umlaut = "ä" # width = 1, bytes = 2 +a_katakana = "ア" # width = 2, bytes = 3 +# need one with width = 1 & bytes = 3 + +class Test(TestCase): + def test_utf_byte_length(self): + self.assertEqual(1, utf_byte_length(a_ascii[0])) + self.assertEqual(2, utf_byte_length(a_umlaut[0])) + self.assertEqual(3, utf_byte_length(a_katakana[0])) + + def test_uwid(self): + self.assertEqual(1, uwid(a_ascii)) + self.assertEqual(1, uwid(a_umlaut)) + self.assertEqual(2, uwid(a_katakana)) + self.assertEqual(3, uwid(a_katakana + a_umlaut)) + self.assertEqual(4, uwid("asdf")) + self.assertEqual(5, uwid("löööl")) + self.assertEqual(6, uwid("バババ")) + +if __name__ == '__main__': main() |