diff options
-rw-r--r-- | ranger/ext/widestring.py | 154 |
1 files changed, 8 insertions, 146 deletions
diff --git a/ranger/ext/widestring.py b/ranger/ext/widestring.py index 150a142e..a35071b3 100644 --- a/ranger/ext/widestring.py +++ b/ranger/ext/widestring.py @@ -1,6 +1,5 @@ # -*- encoding: utf8 -*- # Copyright (C) 2009, 2010 Roman Zimbelmann <romanz@lavabit.com> -# Copyright (C) 2004, 2005 Timo Hirvonen # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,9 +13,6 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# ---- -# This file contains portions of code from cmus (uchar.c). import sys from unicodedata import east_asian_width @@ -31,98 +27,13 @@ def uwid(string): if not PY3: string = string.decode('utf-8', 'ignore') return sum(utf_char_width(c) for c in string) -# end = len(string) -# i = 0 -# width = 0 -# while i < end: -# bytelen = utf_byte_length(string[i:]) -# width += utf_char_width(string[i:i+bytelen]) -# i += bytelen -# return width + def uchars(string): + """Return a list of characters in a string""" if not PY3: string = string.decode('utf-8', 'ignore') return list(string) - #end = len(string) - #i = 0 - #result = [] - #while i < end: - #bytelen = utf_byte_length(string[i:]) - #result.append(string[i:i+bytelen]) - #i += bytelen - #return result - -def _utf_char_to_int(string): - # Squash the last 6 bits of each byte together to an integer - if sys.version > '3': - return ord(string) - else: - # THIS CODE IS INCORRECT - u = 0 - for c in string: - u = (u << 6) | (ord(c) & 0b00111111) - return u - -def utf_char_width_(u): - if u < 0x1100: - return NARROW - # Hangul Jamo init. constonants - if u <= 0x115F: - return WIDE - # Angle Brackets - if u == 0x2329 or u == 0x232A: - return WIDE - if u < 0x2e80: - return NARROW - # CJK ... Yi - if u < 0x302A: - return WIDE - if u <= 0x302F: - return NARROW - if u == 0x303F or u == 0x3099 or u == 0x309a: - return NARROW - # CJK ... Yi - if u <= 0xA4CF: - return WIDE - # Hangul Syllables - if u >= 0xAC00 and u <= 0xD7A3: - return WIDE - # CJK Compatibility Ideographs - if u >= 0xF900 and u <= 0xFAFF: - return WIDE - # CJK Compatibility Forms - if u >= 0xFE30 and u <= 0xFE6F: - return WIDE - # Fullwidth Forms - if u >= 0xFF00 and u <= 0xFF60 or u >= 0xFFE0 and u <= 0xFFE6: - return WIDE - # CJK Extra Stuff - if u >= 0x20000 and u <= 0x2FFFD: - return WIDE - # ? - if u >= 0x30000 and u <= 0x3FFFD: - return WIDE - return NARROW # invalid (?) - - -def utf_byte_length(string): - """Return the byte length of one utf character""" - if sys.version >= '3': - firstord = string.encode("utf-8")[0] - else: - firstord = ord(string[0]) - if firstord < 0b01111111: - return 1 - if firstord < 0b10111111: - return 1 # invalid - if firstord < 0b11011111: - return 2 - if firstord < 0b11101111: - return 3 - if firstord < 0b11110100: - return 4 - return 1 # invalid def utf_char_width(string): @@ -132,34 +43,16 @@ def utf_char_width(string): return NARROW -def width(string): - """Return the width of a string""" - end = len(string) - i = 0 - width = 0 - while i < end: - bytelen = utf_byte_length(string[i:]) - width += utf_char_width(string[i:i+bytelen]) - i += bytelen - return width - - def string_to_charlist(string): + """Return a list of characters with extra empty strings after wide chars""" if not set(string) - ASCIIONLY: return list(string) - end = len(string) - i = 0 + if not PY3: + string = string.decode('utf-8', 'ignore') result = [] - py3 = sys.version > '3' - while i < end: - if py3: - result.append(string[i:i+1]) - i += 1 - else: - bytelen = utf_byte_length(string[i:]) - result.append(string[i:i+bytelen]) - i += bytelen - if utf_char_width_(_utf_char_to_int(result[-1])) == WIDE: + for c in string: + result.append(c) + if east_asian_width(c)[0] == 'W': result.append('') return result @@ -204,37 +97,6 @@ class WideString(object): def __repr__(self): return '<' + self.__class__.__name__ + " '" + self.string + "'>" - #def __getslice__(self, a, z): - #""" - #>>> WideString("asdf")[1:3] - #<WideString 'sd'> - #>>> WideString("モヒカン")[2:4] - #<WideString 'ヒ'> - #>>> WideString("モヒカン")[2:5] - #<WideString 'ヒ '> - #>>> WideString("モヒカン")[1:5] - #<WideString ' ヒ '> - #>>> WideString("モヒカン")[:] - #<WideString 'モヒカン'> - #>>> WideString("asdfモ")[0:6] - #<WideString 'asdfモ'> - #>>> WideString("asdfモ")[0:5] - #<WideString 'asdf '> - #>>> WideString("asdfモ")[0:4] - #<WideString 'asdf'> - #""" - #if z is None or z >= len(self.chars): - #z = len(self.chars) - 1 - #if a is None or a < 0: - #a = 0 - #if z < len(self.chars) - 1 and self.chars[z] == '': - #if self.chars[a] == '': - #return WideString(' ' + ''.join(self.chars[a:z - 1]) + ' ') - #return WideString(''.join(self.chars[a:z - 1]) + ' ') - #if self.chars[a] == '': - #return WideString(' ' + ''.join(self.chars[a:z - 1])) - #return WideString(''.join(self.chars[a:z])) - def __getslice__(self, a, z): """ >>> WideString("asdf")[1:3] |