diff options
author | hut <hut@lavabit.com> | 2010-09-30 01:08:40 +0200 |
---|---|---|
committer | hut <hut@lavabit.com> | 2010-09-30 01:08:40 +0200 |
commit | 512f386be8753775ec824a6d9cbaf6527d50eda4 (patch) | |
tree | 1799f231efa6df1b49602a142b6c3378ff8d6967 | |
parent | d4900452fca51685349966d527d173fdefe83f08 (diff) | |
download | ranger-512f386be8753775ec824a6d9cbaf6527d50eda4.tar.gz |
ext.utfwidth: updated algorithms
-rw-r--r-- | ranger/ext/utfwidth.py | 45 | ||||
-rw-r--r-- | ranger/gui/curses_shortcuts.py | 9 | ||||
-rw-r--r-- | test/tc_utfwidth.py | 5 |
3 files changed, 20 insertions, 39 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py index 364db757..762f3894 100644 --- a/ranger/ext/utfwidth.py +++ b/ranger/ext/utfwidth.py @@ -28,20 +28,25 @@ WIDE = 2 def uwid(string, count=maxint): """Return the width of a string""" - end = len(string) - i = 0 + try: + string = string.decode('utf8', 'replace') + except AttributeError: + pass width = 0 - while i < end and count: - bytelen = utf_byte_length(string[i:]) - width += utf_char_width(string[i:i+bytelen]) - i += bytelen + for c in string: + width += utf_char_width(c) count -= 1 + if not count: + break return width def uchars(string): """Return a list with one string for each character""" - end = len(string) - i = 0 + try: + string = string.decode('utf-8', 'replace') + except AttributeError: + pass + return list(string) result = [] while i < end: bytelen = utf_byte_length(string[i:]) @@ -60,24 +65,9 @@ def uwidslice(string, start=0, end=maxint): chars.append(c) return "".join(chars[start:end]) -def utf_byte_length(string): - """Return the byte length of one utf character""" - firstord = ord(string[0]) - if firstord < 0b01111111: - return 1 - if firstord < 0b10111111: - return 1 # invalid - if firstord < 0b11011111: - return 2 - if firstord < 0b11101111: - return 3 - if firstord < 0b11110100: - return 4 - return 1 # invalid - def utf_char_width(string): """Return the width of a single character""" - u = _utf_char_to_int(string) + u = ord(string) if u < 0x1100: return NARROW # Hangul Jamo init. constonants @@ -117,10 +107,3 @@ def utf_char_width(string): if u >= 0x30000 and u <= 0x3FFFD: return WIDE return NARROW # invalid (?) - -def _utf_char_to_int(string): - # Squash the last 6 bits of each byte together to an integer - u = 0 - for c in string: - u = (u << 6) | (ord(c) & 0b00111111) - return u diff --git a/ranger/gui/curses_shortcuts.py b/ranger/gui/curses_shortcuts.py index 3df45700..65886d7e 100644 --- a/ranger/gui/curses_shortcuts.py +++ b/ranger/gui/curses_shortcuts.py @@ -51,9 +51,12 @@ class CursesShortcuts(SettingsAware): pass except UnicodeEncodeError: try: - self.win.addstr(*(ascii_only(obj) for obj in args)) - except (_curses.error, TypeError): - pass + self.win.addstr(*(obj.encode('utf8') for obj in args)) + except UnicodeEncodeError: + try: + self.win.addstr(*(ascii_only(obj) for obj in args)) + except (_curses.error, TypeError): + pass def addnstr(self, *args): try: diff --git a/test/tc_utfwidth.py b/test/tc_utfwidth.py index 67ff609e..fba9f783 100644 --- a/test/tc_utfwidth.py +++ b/test/tc_utfwidth.py @@ -29,11 +29,6 @@ a_katakana = "ア" # width = 2, bytes = 3 # need one with width = 1 & bytes = 3 class Test(TestCase): - def test_utf_byte_length(self): - self.assertEqual(1, utf_byte_length(a_ascii)) - self.assertEqual(2, utf_byte_length(a_umlaut)) - self.assertEqual(3, utf_byte_length(a_katakana)) - def test_uwid(self): self.assertEqual(1, uwid(a_ascii)) self.assertEqual(1, uwid(a_umlaut)) |