about summary refs log tree commit diff stats
path: root/ranger/ext/utfwidth.py
diff options
context:
space:
mode:
authorhut <hut@lavabit.com>2010-09-30 04:41:24 +0200
committerhut <hut@lavabit.com>2010-09-30 04:41:24 +0200
commit8c8e7282b3b4238a3b7cf981d9e5715b11076419 (patch)
treebbf15239c0679b495d0a9d6388f02477643be448 /ranger/ext/utfwidth.py
parent5cb67eeb96d337b55deea20131fc44a3d5447251 (diff)
downloadranger-8c8e7282b3b4238a3b7cf981d9e5715b11076419.tar.gz
ext.utfwidth: Compatibility with both py2.6 and py3.1
Diffstat (limited to 'ranger/ext/utfwidth.py')
-rw-r--r--ranger/ext/utfwidth.py123
1 files changed, 82 insertions, 41 deletions
diff --git a/ranger/ext/utfwidth.py b/ranger/ext/utfwidth.py
index 762f3894..5c850607 100644
--- a/ranger/ext/utfwidth.py
+++ b/ranger/ext/utfwidth.py
@@ -18,56 +18,80 @@
 # ----
 # This file contains portions of code from cmus (uchar.c).
 
-try:
-	from sys import maxint
-except:
-	from sys import maxsize as maxint
+"""
+This module provides functions that operate with the width of characters
+and strings rather than characters or bytes.
+"""
+
+import sys
 
 NARROW = 1
 WIDE = 2
 
-def uwid(string, count=maxint):
-	"""Return the width of a string"""
-	try:
-		string = string.decode('utf8', 'replace')
-	except AttributeError:
-		pass
-	width = 0
-	for c in string:
-		width += utf_char_width(c)
-		count -= 1
-		if not count:
-			break
-	return width
+if sys.version > '3':
+	def uwid(string, count=-1):
+		"""Return the width of a string"""
+		width = 0
+		for c in string:
+			width += utf_char_width(c)
+			count -= 1
+			if not count:
+				break
+		return width
 
-def uchars(string):
-	"""Return a list with one string for each character"""
-	try:
-		string = string.decode('utf-8', 'replace')
-	except AttributeError:
-		pass
-	return list(string)
-	result = []
-	while i < end:
-		bytelen = utf_byte_length(string[i:])
-		result.append(string[i:i+bytelen])
-		i += bytelen
-	return result
+	def uchars(string):
+		"""Return a list with one string for each character"""
+		return list(string)
 
-def uwidslice(string, start=0, end=maxint):
-	chars = []
-	for c in uchars(string):
-		c_wid = utf_char_width(c)
-		if c_wid == NARROW:
-			chars.append(c)
-		elif c_wid == WIDE:
-			chars.append("")
-			chars.append(c)
-	return "".join(chars[start:end])
+	utf_ord = ord
+else:
+	def uwid(string, count=-1):
+		"""Return the width of a string"""
+		end = len(string)
+		i = 0
+		width = 0
+		while i < end and count:
+			bytelen = _utf_byte_length(string[i:])
+			width += utf_char_width(string[i:i+bytelen])
+			i += bytelen
+			count -= 1
+		return width
+
+	def uchars(string):
+		"""Return a list with one string for each character"""
+		end = len(string)
+		i = 0
+		result = []
+		while i < end:
+			bytelen = _utf_byte_length(string[i:])
+			result.append(string[i:i+bytelen])
+			i += bytelen
+		return result
+
+	def _utf_byte_length(string):
+		"""Return the byte length of one utf character"""
+		firstord = ord(string[0])
+		if firstord < 0b01111111:
+			return 1
+		if firstord < 0b10111111:
+			return 1  # invalid
+		if firstord < 0b11011111:
+			return 2
+		if firstord < 0b11101111:
+			return 3
+		if firstord < 0b11110100:
+			return 4
+		return 1  # invalid
+
+	def utf_ord(char):
+		value = 0
+		for byte in char:
+			value = (value << 6) | (ord(byte) & 0b00111111)
+		return value
 
 def utf_char_width(string):
 	"""Return the width of a single character"""
-	u = ord(string)
+	u = utf_ord(string)
 	if u < 0x1100:
 		return NARROW
 	# Hangul Jamo init. constonants
@@ -107,3 +131,20 @@ def utf_char_width(string):
 	if u >= 0x30000 and u <= 0x3FFFD:
 		return WIDE
 	return NARROW  # invalid (?)
+
+def uslice(string, start=0, end=1000000000):
+	"""
+	Returns a sliced string.
+
+	Works like string[start:end] except that one step represents
+	one narrow character in a monospaced character grid.
+	"""
+	chars = []
+	for c in uchars(string):
+		c_wid = utf_char_width(c)
+		if c_wid == NARROW:
+			chars.append(c)
+		elif c_wid == WIDE:
+			chars.append("")
+			chars.append(c)
+	return "".join(chars[start:end])