diff options
author | hut <hut@lavabit.com> | 2010-05-17 20:12:35 +0200 |
---|---|---|
committer | hut <hut@lavabit.com> | 2010-05-17 20:12:35 +0200 |
commit | c184e88b5caa66e1cda90019e6e74e0036a24959 (patch) | |
tree | 58cdbaee5a54f9a039396faf9789d242afd47ec8 /ranger/ext | |
parent | ce421875249b2ce9c6eacbade9b750e46ae80a20 (diff) | |
parent | d213c6b6eb252df03e4fbbe97e7e71876da8c309 (diff) | |
download | ranger-c184e88b5caa66e1cda90019e6e74e0036a24959.tar.gz |
Merge branch 'utf'
Conflicts: ranger/__main__.py
Diffstat (limited to 'ranger/ext')
-rw-r--r-- | ranger/ext/utfwidth.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py new file mode 100644 index 00000000..a506c676 --- /dev/null +++ b/ranger/ext/utfwidth.py @@ -0,0 +1,109 @@ +# -*- encoding: utf8 -*- +# Copyright (C) 2009, 2010 Roman Zimbelmann <romanz@lavabit.com> +# Copyright (C) 2004, 2005 Timo Hirvonen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# ---- +# This file contains portions of code from cmus (uchar.c). + +NARROW = 1 +WIDE = 2 + +def uwid(string): + """Return the width of a string""" + end = len(string) + i = 0 + width = 0 + while i < end: + bytelen = utf_byte_length(string[i:]) + width += utf_char_width(string[i:i+bytelen]) + i += bytelen + return width + +def uchars(string): + """Return a list with one string for each character""" + end = len(string) + i = 0 + result = [] + while i < end: + bytelen = utf_byte_length(string[i:]) + result.append(string[i:i+bytelen]) + i += bytelen + return result + +def utf_byte_length(string): + """Return the byte length of one utf character""" + firstord = ord(string[0]) + if firstord < 0b01111111: + return 1 + if firstord < 0b10111111: + return 1 # invalid + if firstord < 0b11011111: + return 2 + if firstord < 0b11101111: + return 3 + if firstord < 0b11110100: + return 4 + return 1 # invalid + +def utf_char_width(string): + """Return the width of a single character""" + u = _utf_char_to_int(string) + if u < 0x1100: + return NARROW + # Hangul Jamo init. constonants + if u <= 0x115F: + return WIDE + # Angle Brackets + if u == 0x2329 or u == 0x232A: + return WIDE + if u < 0x2e80: + return NARROW + # CJK ... Yi + if u < 0x302A: + return WIDE + if u <= 0x302F: + return NARROW + if u == 0x303F or u == 0x3099 or u == 0x309a: + return NARROW + # CJK ... Yi + if u <= 0xA4CF: + return WIDE + # Hangul Syllables + if u >= 0xAC00 and u <= 0xD7A3: + return WIDE + # CJK Compatibility Ideographs + if u >= 0xF900 and u <= 0xFAFF: + return WIDE + # CJK Compatibility Forms + if u >= 0xFE30 and u <= 0xFE6F: + return WIDE + # Fullwidth Forms + if u >= 0xFF00 and u <= 0xFF60 or u >= 0xFFE0 and u <= 0xFFE6: + return WIDE + # CJK Extra Stuff + if u >= 0x20000 and u <= 0x2FFFD: + return WIDE + # ? + if u >= 0x30000 and u <= 0x3FFFD: + return WIDE + return NARROW # invalid (?) + +def _utf_char_to_int(string): + # Squash the last 6 bits of each byte together to an integer + u = 0 + for c in string: + u = (u << 6) | (ord(c) & 0b00111111) + return u |