Lib/_pyrepl/utils.py - platform/external/python/cpython3 - Git at Google

 import re
 import unicodedata
 import functools

 from .types import CharBuffer, CharWidths
 from .trace import trace

 ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
 ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
 ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})


 @functools.cache
 def str_width(c: str) -> int:
     if ord(c) < 128:
         return 1
     # gh-139246 for zero-width joiner and combining characters
     if unicodedata.combining(c):
         return 0
     category = unicodedata.category(c)
     if category == "Cf" and c != "\u00ad":
         return 0
     w = unicodedata.east_asian_width(c)
     if w in ("N", "Na", "H", "A"):
         return 1
     return 2


 def wlen(s: str) -> int:
     if len(s) == 1 and s != "\x1a":
         return str_width(s)
     length = sum(str_width(i) for i in s)
     # remove lengths of any escape sequences
     sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
     ctrl_z_cnt = s.count("\x1a")
     return length - sum(len(i) for i in sequence) + ctrl_z_cnt


 def unbracket(s: str, including_content: bool = False) -> str:
     r"""Return `s` with \001 and \002 characters removed.

     If `including_content` is True, content between \001 and \002 is also
     stripped.
     """
     if including_content:
         return ZERO_WIDTH_BRACKET.sub("", s)
     return s.translate(ZERO_WIDTH_TRANS)


 def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
     r"""Decompose the input buffer into a printable variant.

     Returns a tuple of two lists:
     - the first list is the input buffer, character by character;
     - the second list is the visible width of each character in the input
       buffer.

     Examples:
     >>> utils.disp_str("a = 9")
     (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1])
     """
     chars: CharBuffer = []
     char_widths: CharWidths = []

     if not buffer:
         return chars, char_widths

     for c in buffer:
         if c == "\x1a":  # CTRL-Z on Windows
             chars.append(c)
             char_widths.append(2)
         elif ord(c) < 128:
             chars.append(c)
             char_widths.append(1)
         elif unicodedata.category(c).startswith("C"):
             c = r"\u%04x" % ord(c)
             chars.append(c)
             char_widths.append(len(c))
         else:
             chars.append(c)
             char_widths.append(str_width(c))
     trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths)
     return chars, char_widths
	import re
	import unicodedata
	import functools

	from .types import CharBuffer, CharWidths
	from .trace import trace

	ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
	ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
	ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})


	@functools.cache
	def str_width(c: str) -> int:
	if ord(c) < 128:
	return 1
	# gh-139246 for zero-width joiner and combining characters
	if unicodedata.combining(c):
	return 0
	category = unicodedata.category(c)
	if category == "Cf" and c != "\u00ad":
	return 0
	w = unicodedata.east_asian_width(c)
	if w in ("N", "Na", "H", "A"):
	return 1
	return 2


	def wlen(s: str) -> int:
	if len(s) == 1 and s != "\x1a":
	return str_width(s)
	length = sum(str_width(i) for i in s)
	# remove lengths of any escape sequences
	sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
	ctrl_z_cnt = s.count("\x1a")
	return length - sum(len(i) for i in sequence) + ctrl_z_cnt


	def unbracket(s: str, including_content: bool = False) -> str:
	r"""Return `s` with \001 and \002 characters removed.

	If `including_content` is True, content between \001 and \002 is also
	stripped.
	"""
	if including_content:
	return ZERO_WIDTH_BRACKET.sub("", s)
	return s.translate(ZERO_WIDTH_TRANS)


	def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
	r"""Decompose the input buffer into a printable variant.

	Returns a tuple of two lists:
	- the first list is the input buffer, character by character;
	- the second list is the visible width of each character in the input
	buffer.

	Examples:
	>>> utils.disp_str("a = 9")
	(['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1])
	"""
	chars: CharBuffer = []
	char_widths: CharWidths = []

	if not buffer:
	return chars, char_widths

	for c in buffer:
	if c == "\x1a": # CTRL-Z on Windows
	chars.append(c)
	char_widths.append(2)
	elif ord(c) < 128:
	chars.append(c)
	char_widths.append(1)
	elif unicodedata.category(c).startswith("C"):
	c = r"\u%04x" % ord(c)
	chars.append(c)
	char_widths.append(len(c))
	else:
	chars.append(c)
	char_widths.append(str_width(c))
	trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths)
	return chars, char_widths