diff options
author | hut <hut@lavabit.com> | 2010-09-30 04:41:24 +0200 |
---|---|---|
committer | hut <hut@lavabit.com> | 2010-09-30 04:41:24 +0200 |
commit | 8c8e7282b3b4238a3b7cf981d9e5715b11076419 (patch) | |
tree | bbf15239c0679b495d0a9d6388f02477643be448 /ranger/ext/utfwidth.py | |
parent | 5cb67eeb96d337b55deea20131fc44a3d5447251 (diff) | |
download | ranger-8c8e7282b3b4238a3b7cf981d9e5715b11076419.tar.gz |
ext.utfwidth: Compatibility with both py2.6 and py3.1
Diffstat (limited to 'ranger/ext/utfwidth.py')
-rw-r--r-- | ranger/ext/utfwidth.py | 123 |
1 files changed, 82 insertions, 41 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py index 762f3894..5c850607 100644 --- a/ranger/ext/utfwidth.py +++ b/ranger/ext/utfwidth.py @@ -18,56 +18,80 @@ # ---- # This file contains portions of code from cmus (uchar.c). -try: - from sys import maxint -except: - from sys import maxsize as maxint +""" +This module provides functions that operate with the width of characters +and strings rather than characters or bytes. +""" + +import sys NARROW = 1 WIDE = 2 -def uwid(string, count=maxint): - """Return the width of a string""" - try: - string = string.decode('utf8', 'replace') - except AttributeError: - pass - width = 0 - for c in string: - width += utf_char_width(c) - count -= 1 - if not count: - break - return width +if sys.version > '3': + def uwid(string, count=-1): + """Return the width of a string""" + width = 0 + for c in string: + width += utf_char_width(c) + count -= 1 + if not count: + break + return width -def uchars(string): - """Return a list with one string for each character""" - try: - string = string.decode('utf-8', 'replace') - except AttributeError: - pass - return list(string) - result = [] - while i < end: - bytelen = utf_byte_length(string[i:]) - result.append(string[i:i+bytelen]) - i += bytelen - return result + def uchars(string): + """Return a list with one string for each character""" + return list(string) -def uwidslice(string, start=0, end=maxint): - chars = [] - for c in uchars(string): - c_wid = utf_char_width(c) - if c_wid == NARROW: - chars.append(c) - elif c_wid == WIDE: - chars.append("") - chars.append(c) - return "".join(chars[start:end]) + utf_ord = ord +else: + def uwid(string, count=-1): + """Return the width of a string""" + end = len(string) + i = 0 + width = 0 + while i < end and count: + bytelen = _utf_byte_length(string[i:]) + width += utf_char_width(string[i:i+bytelen]) + i += bytelen + count -= 1 + return width + + def uchars(string): + """Return a list with one string for each character""" + end = len(string) + i = 0 + result = [] + while i < end: + bytelen = _utf_byte_length(string[i:]) + result.append(string[i:i+bytelen]) + i += bytelen + return result + + def _utf_byte_length(string): + """Return the byte length of one utf character""" + firstord = ord(string[0]) + if firstord < 0b01111111: + return 1 + if firstord < 0b10111111: + return 1 # invalid + if firstord < 0b11011111: + return 2 + if firstord < 0b11101111: + return 3 + if firstord < 0b11110100: + return 4 + return 1 # invalid + + def utf_ord(char): + value = 0 + for byte in char: + value = (value << 6) | (ord(byte) & 0b00111111) + return value def utf_char_width(string): """Return the width of a single character""" - u = ord(string) + u = utf_ord(string) if u < 0x1100: return NARROW # Hangul Jamo init. constonants @@ -107,3 +131,20 @@ def utf_char_width(string): if u >= 0x30000 and u <= 0x3FFFD: return WIDE return NARROW # invalid (?) + +def uslice(string, start=0, end=1000000000): + """ + Returns a sliced string. + + Works like string[start:end] except that one step represents + one narrow character in a monospaced character grid. + """ + chars = [] + for c in uchars(string): + c_wid = utf_char_width(c) + if c_wid == NARROW: + chars.append(c) + elif c_wid == WIDE: + chars.append("") + chars.append(c) + return "".join(chars[start:end]) |