about summary refs log tree commit diff stats
path: root/ranger/ext/utfwidth.py
diff options
context:
space:
mode:
authorhut <hut@lavabit.com>2010-09-30 04:51:29 +0200
committerhut <hut@lavabit.com>2010-09-30 04:51:29 +0200
commit473daebb6870a54fe4b47529e77be698a2edfdb3 (patch)
treec79a19c727de9ef91649561a0e2994b39b592f56 /ranger/ext/utfwidth.py
parent8c8e7282b3b4238a3b7cf981d9e5715b11076419 (diff)
downloadranger-473daebb6870a54fe4b47529e77be698a2edfdb3.tar.gz
Revert 5 commits concerning utf (due to very poor performance)
This reverts commits:
8c8e7282b3b4238a3b7cf981d9e5715b11076419
5cb67eeb96d337b55deea20131fc44a3d5447251
512f386be8753775ec824a6d9cbaf6527d50eda4
d4900452fca51685349966d527d173fdefe83f08
e6dda13a71168f9ec4a1e4844edad5a3257803e9
Diffstat (limited to 'ranger/ext/utfwidth.py')
-rw-r--r--ranger/ext/utfwidth.py121
1 files changed, 40 insertions, 81 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
index 5c850607..a506c676 100644
--- a/ranger/ext/utfwidth.py
+++ b/ranger/ext/utfwidth.py
@@ -18,80 +18,49 @@
 # ----
 # This file contains portions of code from cmus (uchar.c).
 
-"""
-This module provides functions that operate with the width of characters
-and strings rather than characters or bytes.
-"""
-
-import sys
-
 NARROW = 1
 WIDE = 2
 
-if sys.version > '3':
-	def uwid(string, count=-1):
-		"""Return the width of a string"""
-		width = 0
-		for c in string:
-			width += utf_char_width(c)
-			count -= 1
-			if not count:
-				break
-		return width
-
-	def uchars(string):
-		"""Return a list with one string for each character"""
-		return list(string)
+def uwid(string):
+	"""Return the width of a string"""
+	end = len(string)
+	i = 0
+	width = 0
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		width += utf_char_width(string[i:i+bytelen])
+		i += bytelen
+	return width
 
-	utf_ord = ord
-else:
-	def uwid(string, count=-1):
-		"""Return the width of a string"""
-		end = len(string)
-		i = 0
-		width = 0
-		while i < end and count:
-			bytelen = _utf_byte_length(string[i:])
-			width += utf_char_width(string[i:i+bytelen])
-			i += bytelen
-			count -= 1
-		return width
+def uchars(string):
+	"""Return a list with one string for each character"""
+	end = len(string)
+	i = 0
+	result = []
+	while i < end:
+		bytelen = utf_byte_length(string[i:])
+		result.append(string[i:i+bytelen])
+		i += bytelen
+	return result
 
-	def uchars(string):
-		"""Return a list with one string for each character"""
-		end = len(string)
-		i = 0
-		result = []
-		while i < end:
-			bytelen = _utf_byte_length(string[i:])
-			result.append(string[i:i+bytelen])
-			i += bytelen
-		return result
-
-	def _utf_byte_length(string):
-		"""Return the byte length of one utf character"""
-		firstord = ord(string[0])
-		if firstord < 0b01111111:
-			return 1
-		if firstord < 0b10111111:
-			return 1  # invalid
-		if firstord < 0b11011111:
-			return 2
-		if firstord < 0b11101111:
-			return 3
-		if firstord < 0b11110100:
-			return 4
+def utf_byte_length(string):
+	"""Return the byte length of one utf character"""
+	firstord = ord(string[0])
+	if firstord < 0b01111111:
+		return 1
+	if firstord < 0b10111111:
 		return 1  # invalid
-
-	def utf_ord(char):
-		value = 0
-		for byte in char:
-			value = (value << 6) | (ord(byte) & 0b00111111)
-		return value
+	if firstord < 0b11011111:
+		return 2
+	if firstord < 0b11101111:
+		return 3
+	if firstord < 0b11110100:
+		return 4
+	return 1  # invalid
 
 def utf_char_width(string):
 	"""Return the width of a single character"""
-	u = utf_ord(string)
+	u = _utf_char_to_int(string)
 	if u < 0x1100:
 		return NARROW
 	# Hangul Jamo init. constonants
@@ -132,19 +101,9 @@ def utf_char_width(string):
 		return WIDE
 	return NARROW  # invalid (?)
 
-def uslice(string, start=0, end=1000000000):
-	"""
-	Returns a sliced string.
-
-	Works like string[start:end] except that one step represents
-	one narrow character in a monospaced character grid.
-	"""
-	chars = []
-	for c in uchars(string):
-		c_wid = utf_char_width(c)
-		if c_wid == NARROW:
-			chars.append(c)
-		elif c_wid == WIDE:
-			chars.append("")
-			chars.append(c)
-	return "".join(chars[start:end])
+def _utf_char_to_int(string):
+	# Squash the last 6 bits of each byte together to an integer
+	u = 0
+	for c in string:
+		u = (u << 6) | (ord(c) & 0b00111111)
+	return u