diff options
author | hut <hut@lavabit.com> | 2010-09-30 04:51:29 +0200 |
---|---|---|
committer | hut <hut@lavabit.com> | 2010-09-30 04:51:29 +0200 |
commit | 473daebb6870a54fe4b47529e77be698a2edfdb3 (patch) | |
tree | c79a19c727de9ef91649561a0e2994b39b592f56 /ranger/ext/utfwidth.py | |
parent | 8c8e7282b3b4238a3b7cf981d9e5715b11076419 (diff) | |
download | ranger-473daebb6870a54fe4b47529e77be698a2edfdb3.tar.gz |
Revert 5 commits concerning utf (due to very poor performance)
This reverts commits: 8c8e7282b3b4238a3b7cf981d9e5715b11076419 5cb67eeb96d337b55deea20131fc44a3d5447251 512f386be8753775ec824a6d9cbaf6527d50eda4 d4900452fca51685349966d527d173fdefe83f08 e6dda13a71168f9ec4a1e4844edad5a3257803e9
Diffstat (limited to 'ranger/ext/utfwidth.py')
-rw-r--r-- | ranger/ext/utfwidth.py | 121 |
1 files changed, 40 insertions, 81 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py index 5c850607..a506c676 100644 --- a/ranger/ext/utfwidth.py +++ b/ranger/ext/utfwidth.py @@ -18,80 +18,49 @@ # ---- # This file contains portions of code from cmus (uchar.c). -""" -This module provides functions that operate with the width of characters -and strings rather than characters or bytes. -""" - -import sys - NARROW = 1 WIDE = 2 -if sys.version > '3': - def uwid(string, count=-1): - """Return the width of a string""" - width = 0 - for c in string: - width += utf_char_width(c) - count -= 1 - if not count: - break - return width - - def uchars(string): - """Return a list with one string for each character""" - return list(string) +def uwid(string): + """Return the width of a string""" + end = len(string) + i = 0 + width = 0 + while i < end: + bytelen = utf_byte_length(string[i:]) + width += utf_char_width(string[i:i+bytelen]) + i += bytelen + return width - utf_ord = ord -else: - def uwid(string, count=-1): - """Return the width of a string""" - end = len(string) - i = 0 - width = 0 - while i < end and count: - bytelen = _utf_byte_length(string[i:]) - width += utf_char_width(string[i:i+bytelen]) - i += bytelen - count -= 1 - return width +def uchars(string): + """Return a list with one string for each character""" + end = len(string) + i = 0 + result = [] + while i < end: + bytelen = utf_byte_length(string[i:]) + result.append(string[i:i+bytelen]) + i += bytelen + return result - def uchars(string): - """Return a list with one string for each character""" - end = len(string) - i = 0 - result = [] - while i < end: - bytelen = _utf_byte_length(string[i:]) - result.append(string[i:i+bytelen]) - i += bytelen - return result - - def _utf_byte_length(string): - """Return the byte length of one utf character""" - firstord = ord(string[0]) - if firstord < 0b01111111: - return 1 - if firstord < 0b10111111: - return 1 # invalid - if firstord < 0b11011111: - return 2 - if firstord < 0b11101111: - return 3 - if firstord < 0b11110100: - return 4 +def utf_byte_length(string): + """Return the byte length of one utf character""" + firstord = ord(string[0]) + if firstord < 0b01111111: + return 1 + if firstord < 0b10111111: return 1 # invalid - - def utf_ord(char): - value = 0 - for byte in char: - value = (value << 6) | (ord(byte) & 0b00111111) - return value + if firstord < 0b11011111: + return 2 + if firstord < 0b11101111: + return 3 + if firstord < 0b11110100: + return 4 + return 1 # invalid def utf_char_width(string): """Return the width of a single character""" - u = utf_ord(string) + u = _utf_char_to_int(string) if u < 0x1100: return NARROW # Hangul Jamo init. constonants @@ -132,19 +101,9 @@ def utf_char_width(string): return WIDE return NARROW # invalid (?) -def uslice(string, start=0, end=1000000000): - """ - Returns a sliced string. - - Works like string[start:end] except that one step represents - one narrow character in a monospaced character grid. - """ - chars = [] - for c in uchars(string): - c_wid = utf_char_width(c) - if c_wid == NARROW: - chars.append(c) - elif c_wid == WIDE: - chars.append("") - chars.append(c) - return "".join(chars[start:end]) +def _utf_char_to_int(string): + # Squash the last 6 bits of each byte together to an integer + u = 0 + for c in string: + u = (u << 6) | (ord(c) & 0b00111111) + return u |