Reduced amount of style characters carried over from previous lines when aligning text.

kmvanbrunt · kmvanbrunt · commit e76dbad13cb6 · 2022-02-22T19:52:26.000-05:00
Also reduced amount of style characters appended to truncated text.
These changes were made to reduce memory usage in certain use cases of tables (e.g. nested colored tables).
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,11 +1,12 @@
-## 2.4.0 (TBD, 2021)
+## 2.4.0 (TBD, 2022)
 * Bug Fixes
   * Fixed issue in `ansi.async_alert_str()` which would raise `IndexError` if prompt was blank.
   * Fixed issue where tab completion was quoting argparse flags in some cases.
 * Enhancements
   * Added broader exception handling when enabling clipboard functionality via `pyperclip`.
   * Added `PassThroughException` to `__init__.py` imports.
   * cmd2 now uses pyreadline3 when running any version of Python on Windows
+  * Improved memory usage in certain use cases of tables (e.g. nested colored tables)
 * Deletions (potentially breaking changes)
   * Deleted `cmd2.fg` and `cmd2.bg` which were deprecated in 2.3.0. Use `cmd2.Fg` and `cmd2.Bg` instead.
 
diff --git a/cmd2/ansi.py b/cmd2/ansi.py
@@ -23,8 +23,9 @@
 #######################################################
 # Common ANSI escape sequence constants
 #######################################################
-CSI = '\033['
-OSC = '\033]'
+ESC = '\x1b'
+CSI = f'{ESC}['
+OSC = f'{ESC}]'
 BEL = '\a'
 
 
@@ -60,8 +61,26 @@ def __repr__(self) -> str:
 The default is ``AllowStyle.TERMINAL``.
 """
 
-# Regular expression to match ANSI style sequences (including 8-bit and 24-bit colors)
-ANSI_STYLE_RE = re.compile(r'\x1b\[[^m]*m')
+# Regular expression to match ANSI style sequence
+ANSI_STYLE_RE = re.compile(fr'{ESC}\[[^m]*m')
+
+# Matches standard foreground colors: CSI(30-37|90-97|39)m
+STD_FG_RE = re.compile(fr'{ESC}\[(?:[39][0-7]|39)m')
+
+# Matches standard background colors: CSI(40-47|100-107|49)m
+STD_BG_RE = re.compile(fr'{ESC}\[(?:(?:4|10)[0-7]|49)m')
+
+# Matches eight-bit foreground colors: CSI38;5;(0-255)m
+EIGHT_BIT_FG_RE = re.compile(fr'{ESC}\[38;5;(?:1?[0-9]?[0-9]?|2[0-4][0-9]|25[0-5])m')
+
+# Matches eight-bit background colors: CSI48;5;(0-255)m
+EIGHT_BIT_BG_RE = re.compile(fr'{ESC}\[48;5;(?:1?[0-9]?[0-9]?|2[0-4][0-9]|25[0-5])m')
+
+# Matches RGB foreground colors: CSI38;2;(0-255);(0-255);(0-255)m
+RGB_FG_RE = re.compile(fr'{ESC}\[38;2(?:;(?:1?[0-9]?[0-9]?|2[0-4][0-9]|25[0-5])){{3}}m')
+
+# Matches RGB background colors: CSI48;2;(0-255);(0-255);(0-255)m
+RGB_BG_RE = re.compile(fr'{ESC}\[48;2(?:;(?:1?[0-9]?[0-9]?|2[0-4][0-9]|25[0-5])){{3}}m')
 
 
 def strip_style(text: str) -> str:
@@ -240,6 +259,7 @@ class TextStyle(AnsiSequence, Enum):
 
     # Resets all styles and colors of text
     RESET_ALL = 0
+    ALT_RESET_ALL = ''
 
     INTENSITY_BOLD = 1
     INTENSITY_DIM = 2
@@ -606,7 +626,7 @@ def __str__(self) -> str:
         This is helpful when using an EightBitFg in an f-string or format() call
         e.g. my_str = f"{EightBitFg.SLATE_BLUE_1}hello{Fg.RESET}"
         """
-        return f"{CSI}{38};5;{self.value}m"
+        return f"{CSI}38;5;{self.value}m"
 
 
 class EightBitBg(BgColor, Enum):
@@ -879,7 +899,7 @@ def __str__(self) -> str:
         This is helpful when using an EightBitBg in an f-string or format() call
         e.g. my_str = f"{EightBitBg.KHAKI_3}hello{Bg.RESET}"
         """
-        return f"{CSI}{48};5;{self.value}m"
+        return f"{CSI}48;5;{self.value}m"
 
 
 class RgbFg(FgColor):
@@ -900,7 +920,7 @@ def __init__(self, r: int, g: int, b: int) -> None:
         if any(c < 0 or c > 255 for c in [r, g, b]):
             raise ValueError("RGB values must be integers in the range of 0 to 255")
 
-        self._sequence = f"{CSI}{38};2;{r};{g};{b}m"
+        self._sequence = f"{CSI}38;2;{r};{g};{b}m"
 
     def __str__(self) -> str:
         """
@@ -929,7 +949,7 @@ def __init__(self, r: int, g: int, b: int) -> None:
         if any(c < 0 or c > 255 for c in [r, g, b]):
             raise ValueError("RGB values must be integers in the range of 0 to 255")
 
-        self._sequence = f"{CSI}{48};2;{r};{g};{b}m"
+        self._sequence = f"{CSI}48;2;{r};{g};{b}m"
 
     def __str__(self) -> str:
         """
diff --git a/cmd2/decorators.py b/cmd2/decorators.py
@@ -238,13 +238,13 @@ def _set_parser_prog(parser: argparse.ArgumentParser, prog: str) -> None:
             break
 
 
-#: Function signature for an Command Function that uses an argparse.ArgumentParser to process user input
+#: Function signature for a Command Function that uses an argparse.ArgumentParser to process user input
 #: and optionally returns a boolean
 ArgparseCommandFuncOptionalBoolReturn = Union[
     Callable[['cmd2.Cmd', argparse.Namespace], Optional[bool]],
     Callable[[CommandSet, argparse.Namespace], Optional[bool]],
 ]
-#: Function signature for an Command Function that uses an argparse.ArgumentParser to process user input
+#: Function signature for a Command Function that uses an argparse.ArgumentParser to process user input
 #: and returns a boolean
 ArgparseCommandFuncBoolReturn = Union[
     Callable[['cmd2.Cmd', argparse.Namespace], bool],
diff --git a/cmd2/table_creator.py b/cmd2/table_creator.py
@@ -165,7 +165,7 @@ def _wrap_long_word(word: str, max_width: int, max_lines: Union[int, float], is_
         :param is_last_word: True if this is the last word of the total text being wrapped
         :return: Tuple(wrapped text, lines used, display width of last line)
         """
-        styles = utils.get_styles_in_text(word)
+        styles_dict = utils.get_styles_dict(word)
         wrapped_buf = io.StringIO()
 
         # How many lines we've used
@@ -190,9 +190,9 @@ def _wrap_long_word(word: str, max_width: int, max_lines: Union[int, float], is_
                 break
 
             # Check if we're at a style sequence. These don't count toward display width.
-            if char_index in styles:
-                wrapped_buf.write(styles[char_index])
-                char_index += len(styles[char_index])
+            if char_index in styles_dict:
+                wrapped_buf.write(styles_dict[char_index])
+                char_index += len(styles_dict[char_index])
                 continue
 
             cur_char = word[char_index]
@@ -330,7 +330,7 @@ def add_word(word_to_add: str, is_last_word: bool) -> None:
                 break
 
             # Locate the styles in this line
-            styles = utils.get_styles_in_text(data_line)
+            styles_dict = utils.get_styles_dict(data_line)
 
             # Display width of the current line we are building
             cur_line_width = 0
@@ -344,9 +344,9 @@ def add_word(word_to_add: str, is_last_word: bool) -> None:
                     break
 
                 # Check if we're at a style sequence. These don't count toward display width.
-                if char_index in styles:
-                    cur_word_buf.write(styles[char_index])
-                    char_index += len(styles[char_index])
+                if char_index in styles_dict:
+                    cur_word_buf.write(styles_dict[char_index])
+                    char_index += len(styles_dict[char_index])
                     continue
 
                 cur_char = data_line[char_index]
@@ -391,7 +391,7 @@ def _generate_cell_lines(self, cell_data: Any, is_header: bool, col: Column, fil
         :param col: Column definition for this cell
         :param fill_char: character that fills remaining space in a cell. If your text has a background color,
                           then give fill_char the same background color. (Cannot be a line breaking character)
-        :return: Tuple of cell lines deque and the display width of the cell
+        :return: Tuple(deque of cell lines, display width of the cell)
         """
         # Convert data to string and replace tabs with spaces
         data_str = str(cell_data).replace('\t', SPACE * self.tab_width)
@@ -411,8 +411,10 @@ def _generate_cell_lines(self, cell_data: Any, is_header: bool, col: Column, fil
 
         aligned_text = utils.align_text(wrapped_text, fill_char=fill_char, width=col.width, alignment=text_alignment)
 
-        lines = deque(aligned_text.splitlines())
+        # Calculate cell_width first to avoid having 2 copies of aligned_text.splitlines() in memory
         cell_width = ansi.widest_line(aligned_text)
+        lines = deque(aligned_text.splitlines())
+
         return lines, cell_width
 
     def generate_row(
diff --git a/cmd2/utils.py b/cmd2/utils.py
@@ -737,6 +737,87 @@ def __init__(
         self.saved_redirecting = saved_redirecting
 
 
+def _remove_overridden_styles(styles_to_parse: List[str]) -> List[str]:
+    """
+    Utility function for align_text() / truncate_line() which filters a style list down
+    to only those which would still be in effect if all were processed in order.
+
+    This is mainly used to reduce how many style strings are stored in memory when
+    building large multiline strings with ANSI styles. We only need to carry over
+    styles from previous lines that are still in effect.
+
+    :param styles_to_parse: list of styles to evaluate.
+    :return: list of styles that are still in effect.
+    """
+    from . import (
+        ansi,
+    )
+
+    class StyleState:
+        """Keeps track of what text styles are enabled"""
+
+        def __init__(self) -> None:
+            # Contains styles still in effect, keyed by their index in styles_to_parse
+            self.style_dict: Dict[int, str] = dict()
+
+            # Indexes into style_dict
+            self.reset_all: Optional[int] = None
+            self.fg: Optional[int] = None
+            self.bg: Optional[int] = None
+            self.intensity: Optional[int] = None
+            self.italic: Optional[int] = None
+            self.overline: Optional[int] = None
+            self.strikethrough: Optional[int] = None
+            self.underline: Optional[int] = None
+
+    # Read the previous styles in order and keep track of their states
+    style_state = StyleState()
+
+    for index, style in enumerate(styles_to_parse):
+        # For styles types that we recognize, only keep their latest value from styles_to_parse.
+        # All unrecognized style types will be retained and their order preserved.
+        if style in (str(ansi.TextStyle.RESET_ALL), str(ansi.TextStyle.ALT_RESET_ALL)):
+            style_state = StyleState()
+            style_state.reset_all = index
+        elif ansi.STD_FG_RE.match(style) or ansi.EIGHT_BIT_FG_RE.match(style) or ansi.RGB_FG_RE.match(style):
+            if style_state.fg is not None:
+                style_state.style_dict.pop(style_state.fg)
+            style_state.fg = index
+        elif ansi.STD_BG_RE.match(style) or ansi.EIGHT_BIT_BG_RE.match(style) or ansi.RGB_BG_RE.match(style):
+            if style_state.bg is not None:
+                style_state.style_dict.pop(style_state.bg)
+            style_state.bg = index
+        elif style in (
+            str(ansi.TextStyle.INTENSITY_BOLD),
+            str(ansi.TextStyle.INTENSITY_DIM),
+            str(ansi.TextStyle.INTENSITY_NORMAL),
+        ):
+            if style_state.intensity is not None:
+                style_state.style_dict.pop(style_state.intensity)
+            style_state.intensity = index
+        elif style in (str(ansi.TextStyle.ITALIC_ENABLE), str(ansi.TextStyle.ITALIC_DISABLE)):
+            if style_state.italic is not None:
+                style_state.style_dict.pop(style_state.italic)
+            style_state.italic = index
+        elif style in (str(ansi.TextStyle.OVERLINE_ENABLE), str(ansi.TextStyle.OVERLINE_DISABLE)):
+            if style_state.overline is not None:
+                style_state.style_dict.pop(style_state.overline)
+            style_state.overline = index
+        elif style in (str(ansi.TextStyle.STRIKETHROUGH_ENABLE), str(ansi.TextStyle.STRIKETHROUGH_DISABLE)):
+            if style_state.strikethrough is not None:
+                style_state.style_dict.pop(style_state.strikethrough)
+            style_state.strikethrough = index
+        elif style in (str(ansi.TextStyle.UNDERLINE_ENABLE), str(ansi.TextStyle.UNDERLINE_DISABLE)):
+            if style_state.underline is not None:
+                style_state.style_dict.pop(style_state.underline)
+            style_state.underline = index
+
+        # Store this style and its location in the dictionary
+        style_state.style_dict[index] = style
+
+    return list(style_state.style_dict.values())
+
+
 class TextAlignment(Enum):
     """Horizontal text alignment"""
 
@@ -801,7 +882,7 @@ def align_text(
         raise (ValueError("Fill character is an unprintable character"))
 
     # Isolate the style chars before and after the fill character. We will use them when building sequences of
-    # of fill characters. Instead of repeating the style characters for each fill character, we'll wrap each sequence.
+    # fill characters. Instead of repeating the style characters for each fill character, we'll wrap each sequence.
     fill_char_style_begin, fill_char_style_end = fill_char.split(stripped_fill_char)
 
     if text:
@@ -811,10 +892,10 @@ def align_text(
 
     text_buf = io.StringIO()
 
-    # ANSI style sequences that may affect future lines will be cancelled by the fill_char's style.
-    # To avoid this, we save the state of a line's style so we can restore it when beginning the next line.
-    # This also allows the lines to be used independently and still have their style. TableCreator does this.
-    aggregate_styles = ''
+    # ANSI style sequences that may affect subsequent lines will be cancelled by the fill_char's style.
+    # To avoid this, we save styles which are still in effect so we can restore them when beginning the next line.
+    # This also allows lines to be used independently and still have their style. TableCreator does this.
+    previous_styles: List[str] = []
 
     for index, line in enumerate(lines):
         if index > 0:
@@ -827,8 +908,8 @@ def align_text(
         if line_width == -1:
             raise (ValueError("Text to align contains an unprintable character"))
 
-        # Get the styles in this line
-        line_styles = get_styles_in_text(line)
+        # Get list of styles in this line
+        line_styles = list(get_styles_dict(line).values())
 
         # Calculate how wide each side of filling needs to be
         if line_width >= width:
@@ -858,7 +939,7 @@ def align_text(
         right_fill += ' ' * (right_fill_width - ansi.style_aware_wcswidth(right_fill))
 
         # Don't allow styles in fill characters and text to affect one another
-        if fill_char_style_begin or fill_char_style_end or aggregate_styles or line_styles:
+        if fill_char_style_begin or fill_char_style_end or previous_styles or line_styles:
             if left_fill:
                 left_fill = ansi.TextStyle.RESET_ALL + fill_char_style_begin + left_fill + fill_char_style_end
             left_fill += ansi.TextStyle.RESET_ALL
@@ -867,11 +948,12 @@ def align_text(
                 right_fill = ansi.TextStyle.RESET_ALL + fill_char_style_begin + right_fill + fill_char_style_end
             right_fill += ansi.TextStyle.RESET_ALL
 
-        # Write the line and restore any styles from previous lines
-        text_buf.write(left_fill + aggregate_styles + line + right_fill)
+        # Write the line and restore styles from previous lines which are still in effect
+        text_buf.write(left_fill + ''.join(previous_styles) + line + right_fill)
 
-        # Update the aggregate with styles in this line
-        aggregate_styles += ''.join(line_styles.values())
+        # Update list of styles that are still in effect for the next line
+        previous_styles.extend(line_styles)
+        previous_styles = _remove_overridden_styles(previous_styles)
 
     return text_buf.getvalue()
 
@@ -985,7 +1067,7 @@ def truncate_line(line: str, max_width: int, *, tab_width: int = 4) -> str:
         return line
 
     # Find all style sequences in the line
-    styles = get_styles_in_text(line)
+    styles_dict = get_styles_dict(line)
 
     # Add characters one by one and preserve all style sequences
     done = False
@@ -995,10 +1077,10 @@ def truncate_line(line: str, max_width: int, *, tab_width: int = 4) -> str:
 
     while not done:
         # Check if a style sequence is at this index. These don't count toward display width.
-        if index in styles:
-            truncated_buf.write(styles[index])
-            style_len = len(styles[index])
-            styles.pop(index)
+        if index in styles_dict:
+            truncated_buf.write(styles_dict[index])
+            style_len = len(styles_dict[index])
+            styles_dict.pop(index)
             index += style_len
             continue
 
@@ -1015,13 +1097,16 @@ def truncate_line(line: str, max_width: int, *, tab_width: int = 4) -> str:
         truncated_buf.write(char)
         index += 1
 
-    # Append remaining style sequences from original string
-    truncated_buf.write(''.join(styles.values()))
+    # Filter out overridden styles from the remaining ones
+    remaining_styles = _remove_overridden_styles(list(styles_dict.values()))
+
+    # Append the remaining styles to the truncated text
+    truncated_buf.write(''.join(remaining_styles))
 
     return truncated_buf.getvalue()
 
 
-def get_styles_in_text(text: str) -> Dict[int, str]:
+def get_styles_dict(text: str) -> Dict[int, str]:
     """
     Return an OrderedDict containing all ANSI style sequences found in a string
 
diff --git a/docs/features/argument_processing.rst b/docs/features/argument_processing.rst
@@ -82,18 +82,13 @@ Here's what it looks like::
     to bugs in CPython prior to Python 3.7 which make it impossible to make a
     deep copy of an instance of a ``argparse.ArgumentParser``.
 
-    See the table_display_ example for a work-around that demonstrates how to
-    create a function which returns a unique instance of the parser you want.
-
 
 .. note::
 
    The ``@with_argparser`` decorator sets the ``prog`` variable in the argument
    parser based on the name of the method it is decorating. This will override
    anything you specify in ``prog`` variable when creating the argument parser.
 
-.. _table_display: https://github.com/python-cmd2/cmd2/blob/master/examples/table_display.py
-
 
 Help Messages
 -------------
diff --git a/tests/test_ansi.py b/tests/test_ansi.py
diff --git a/tests/test_utils.py b/tests/test_utils.py