From 8c8e7282b3b4238a3b7cf981d9e5715b11076419 Mon Sep 17 00:00:00 2001 From: hut Date: Thu, 30 Sep 2010 04:41:24 +0200 Subject: ext.utfwidth: Compatibility with both py2.6 and py3.1 --- ranger/ext/utfwidth.py | 123 ++++++++++++++++++++++++------------ ranger/gui/bar.py | 6 +- ranger/gui/curses_shortcuts.py | 7 +- ranger/gui/widgets/browsercolumn.py | 8 +-- 4 files changed, 94 insertions(+), 50 deletions(-) diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py index 762f3894..5c850607 100644 --- a/ranger/ext/utfwidth.py +++ b/ranger/ext/utfwidth.py @@ -18,56 +18,80 @@ # ---- # This file contains portions of code from cmus (uchar.c). -try: - from sys import maxint -except: - from sys import maxsize as maxint +""" +This module provides functions that operate with the width of characters +and strings rather than characters or bytes. +""" + +import sys NARROW = 1 WIDE = 2 -def uwid(string, count=maxint): - """Return the width of a string""" - try: - string = string.decode('utf8', 'replace') - except AttributeError: - pass - width = 0 - for c in string: - width += utf_char_width(c) - count -= 1 - if not count: - break - return width +if sys.version > '3': + def uwid(string, count=-1): + """Return the width of a string""" + width = 0 + for c in string: + width += utf_char_width(c) + count -= 1 + if not count: + break + return width -def uchars(string): - """Return a list with one string for each character""" - try: - string = string.decode('utf-8', 'replace') - except AttributeError: - pass - return list(string) - result = [] - while i < end: - bytelen = utf_byte_length(string[i:]) - result.append(string[i:i+bytelen]) - i += bytelen - return result + def uchars(string): + """Return a list with one string for each character""" + return list(string) -def uwidslice(string, start=0, end=maxint): - chars = [] - for c in uchars(string): - c_wid = utf_char_width(c) - if c_wid == NARROW: - chars.append(c) - elif c_wid == WIDE: - chars.append("") - chars.append(c) - return "".join(chars[start:end]) + utf_ord = ord +else: + def uwid(string, count=-1): + """Return the width of a string""" + end = len(string) + i = 0 + width = 0 + while i < end and count: + bytelen = _utf_byte_length(string[i:]) + width += utf_char_width(string[i:i+bytelen]) + i += bytelen + count -= 1 + return width + + def uchars(string): + """Return a list with one string for each character""" + end = len(string) + i = 0 + result = [] + while i < end: + bytelen = _utf_byte_length(string[i:]) + result.append(string[i:i+bytelen]) + i += bytelen + return result + + def _utf_byte_length(string): + """Return the byte length of one utf character""" + firstord = ord(string[0]) + if firstord < 0b01111111: + return 1 + if firstord < 0b10111111: + return 1 # invalid + if firstord < 0b11011111: + return 2 + if firstord < 0b11101111: + return 3 + if firstord < 0b11110100: + return 4 + return 1 # invalid + + def utf_ord(char): + value = 0 + for byte in char: + value = (value << 6) | (ord(byte) & 0b00111111) + return value def utf_char_width(string): """Return the width of a single character""" - u = ord(string) + u = utf_ord(string) if u < 0x1100: return NARROW # Hangul Jamo init. constonants @@ -107,3 +131,20 @@ def utf_char_width(string): if u >= 0x30000 and u <= 0x3FFFD: return WIDE return NARROW # invalid (?) + +def uslice(string, start=0, end=1000000000): + """ + Returns a sliced string. + + Works like string[start:end] except that one step represents + one narrow character in a monospaced character grid. + """ + chars = [] + for c in uchars(string): + c_wid = utf_char_width(c) + if c_wid == NARROW: + chars.append(c) + elif c_wid == WIDE: + chars.append("") + chars.append(c) + return "".join(chars[start:end]) diff --git a/ranger/gui/bar.py b/ranger/gui/bar.py index 56a9d97f..42e1f1c4 100644 --- a/ranger/gui/bar.py +++ b/ranger/gui/bar.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from ranger.ext.utfwidth import uwid, uwidslice, utf_char_width +from ranger.ext.utfwidth import uwid, uslice, utf_char_width class Bar(object): left = None @@ -133,10 +133,10 @@ class ColoredString(object): def cut_off(self, n): if n >= 1: - self.string = uwidslice(self.string, 0, -n) + self.string = uslice(self.string, 0, -n) def cut_off_to(self, n): - self.string = uwidslice(self.string, 0, n) + self.string = uslice(self.string, 0, n) def __len__(self): return uwid(self.string) diff --git a/ranger/gui/curses_shortcuts.py b/ranger/gui/curses_shortcuts.py index 006ea4d0..e5683b66 100644 --- a/ranger/gui/curses_shortcuts.py +++ b/ranger/gui/curses_shortcuts.py @@ -50,8 +50,11 @@ class CursesShortcuts(SettingsAware): except (_curses.error, TypeError): pass except UnicodeEncodeError: - function(*(obj.encode('utf8') if hasattr(obj, 'encode') \ - else obj for obj in args)) + try: + function(*(obj.encode('utf8') if hasattr(obj, 'encode') \ + else obj for obj in args)) + except (_curses.error, TypeError): + pass def addstr(self, *args): self._addxyz_wrapper(self.win.addstr, args) diff --git a/ranger/gui/widgets/browsercolumn.py b/ranger/gui/widgets/browsercolumn.py index d617e64e..6021d622 100644 --- a/ranger/gui/widgets/browsercolumn.py +++ b/ranger/gui/widgets/browsercolumn.py @@ -20,6 +20,7 @@ from time import time from . import Widget from .pager import Pager from ranger.fsobject import BAD_INFO +from ranger.ext.utfwidth import uslice class BrowserColumn(Pager): main_column = False @@ -248,14 +249,13 @@ class BrowserColumn(Pager): this_color.append('link') this_color.append(drawn.exists and 'good' or 'bad') - string = drawn.basename if self.main_column: if tagged: - self.addnstr(line, 0, text, self.wid - 2) + self.addstr(line, 0, uslice(text, 0, self.wid - 2)) elif self.wid > 1: - self.addnstr(line, 1, text, self.wid - 2) + self.addstr(line, 1, uslice(text, 0, self.wid - 2)) else: - self.addnstr(line, 0, text, self.wid) + self.addstr(line, 0, uslice(text, 0, self.wid)) if self.display_infostring and drawn.infostring \ and self.settings.display_size_in_main_column: -- cgit 1.4.1-2-gfad0