feat(log_parser): use regex to implement log parsing

AntaresSimulatorTeam · Aug 1, 2023 · 6f5a2b1 · 6f5a2b1
1 parent 14e85a8
commit 6f5a2b1
Show file tree

Hide file tree

Showing 4 changed files with 136 additions and 61 deletions.
diff --git a/antarest/launcher/adapters/abstractlauncher.py b/antarest/launcher/adapters/abstractlauncher.py
@@ -102,10 +102,7 @@ def update_log(log_line: str) -> None:
             launch_progress_dto = LaunchProgressDTO.parse_obj(
                 launch_progress_json
             )
-            progress_updated = False
-            for line in log_line.split("\n"):
-                progress_updated |= launch_progress_dto.update_progress(line)
-            if progress_updated:
+            if launch_progress_dto.parse_log_lines(log_line.splitlines()):
                 self.event_bus.push(
                     Event(
                         type=EventType.LAUNCH_PROGRESS,

diff --git a/antarest/launcher/adapters/log_parser.py b/antarest/launcher/adapters/log_parser.py
@@ -1,34 +1,96 @@
-import logging
+import functools
 import re
+import typing as t
 
 from pydantic import BaseModel
 
-logger = logging.getLogger(__name__)
+_SearchFunc = t.Callable[[str], t.Optional[t.Match[str]]]
+
+_compile = functools.partial(re.compile, flags=re.IGNORECASE | re.VERBOSE)
+
+# Search for the line indicating the loading of areas (first line of data loading).
+_loading_areas = t.cast(
+    _SearchFunc,
+    _compile(r"Loading \s+ the \s+ list \s+ of \s+ areas").search,
+)
+
+# Search for the total number of Monté-Carlo (MC) years.
+_total_mc_years = t.cast(
+    _SearchFunc,
+    _compile(
+        r"""
+        MC-Years \s* : \s*
+        \[ \d+ \s* \.{2,3} \s*  \d+ ], \s* total \s* : \s*
+        (?P<total_mc_years> \d+)
+        """
+    ).search,
+)
+
+# Search for the line indicating the export of annual results of a Monté-Carlo year.
+_annual_results = t.cast(
+    _SearchFunc,
+    _compile(r"Exporting \s+ the \s+ annual \s+ results").search,
+)
+
+# Search for the line indicating the export of survey results.
+_survey_results = t.cast(
+    _SearchFunc,
+    _compile(r"Exporting \s+ the \s+ survey \s+ results").search,
+)
+
+# Search for the line indicating the solver is quitting gracefully.
+_quitting = t.cast(
+    _SearchFunc,
+    _compile(r"Quitting \s+ the \s+ solver \s+ gracefully").search,
+)
 
 
 class LaunchProgressDTO(BaseModel):
+    """
+    Measure the progress of a study simulation.
+
+    The progress percentage is calculated based on the number of Monté-Carlo
+    years completed relative to the total number of years.
+
+    Attributes:
+        progress:
+            The percentage of completion for the simulation, ranging from 0 to 100.
+        total_mc_years:
+            The total number of Monté-Carlo years for the simulation.
+    """
+
     progress: float = 0
     total_mc_years: int = 1
 
-    def update_progress(self, line: str) -> bool:
-        if "MC-Years : [" in line:
-            if regex_result := re.search(
-                r"MC-Years : \[\d+ .. \d+], total: (\d+)", line
-            ):
-                self.total_mc_years = int(regex_result[1])
-                return True
-            else:
-                logger.warning(
-                    f"Failed to extract log progress batch size on line : {line}"
-                )
-                return False
-        elif "Exporting the annual results" in line:
-            self.progress += 98 / self.total_mc_years
+    def _update_progress(self, line: str) -> bool:
+        """Updates the progress based on the given log line."""
+        if _loading_areas(line):
+            self.progress = 1.0
+            return True
+        if mo := _total_mc_years(line):
+            self.progress = 2.0
+            self.total_mc_years = int(mo["total_mc_years"])
+            return True
+        if _annual_results(line):
+            self.progress += 96 / self.total_mc_years
             return True
-        elif "Exporting the survey results" in line:
-            self.progress = 99
+        if _survey_results(line):
+            self.progress = 99.0
             return True
-        elif "Quitting the solver gracefully" in line:
-            self.progress = 100
+        if _quitting(line):
+            self.progress = 100.0
             return True
         return False
+
+    def parse_log_lines(self, lines: t.Iterable[str]) -> bool:
+        """
+        Parses a sequence of log lines and updates the progress accordingly.
+
+        Args:
+            lines (Iterable[str]): An iterable containing log lines to be parsed.
+
+        Returns:
+            bool: `True` if progress was updated at least once during the parsing,
+                  `False` otherwise.
+        """
+        return bool(sum(self._update_progress(line) for line in lines))
diff --git a/tests/launcher/test_log_parser.py b/tests/launcher/test_log_parser.py
@@ -1,76 +1,91 @@
 import pytest
 
 from antarest.launcher.adapters.log_parser import LaunchProgressDTO
-from tests.storage.integration.data import simulation_log
+from tests.storage.integration.data.simulation_log import SIMULATION_LOG
 
 
 @pytest.mark.parametrize(
-    "launch_progress_dto,line,expected_progression,expected_output",
+    "launch_progress_dto, line, expected_progress, expected_result",
     [
+        (
+            LaunchProgressDTO(total_mc_years=100),
+            "[infos] Loading the list of areas...",
+            1.0,
+            True,
+        ),
         (
             LaunchProgressDTO(total_mc_years=100),
             "[infos] MC-Years : [1 .. 11], total: 11",
-            0,
+            2.0,
             True,
         ),
         (
             LaunchProgressDTO(total_mc_years=10),
             "this is a test",
-            0,
+            0.0,
             False,
         ),
         (
             LaunchProgressDTO(total_mc_years=100),
             "[solver][infos] parallel batch size : 10",
-            0,
+            0.0,
             False,
         ),
         (
             LaunchProgressDTO(total_mc_years=10),
             "[solver][infos] Exporting the annual results",
-            9.8,
+            9.6,
             True,
         ),
         (
             LaunchProgressDTO(total_mc_years=10),
             "[solver][infos] Exporting the survey results",
-            99,
+            99.0,
+            True,
+        ),
+        (
+            LaunchProgressDTO(total_mc_years=10),
+            "[infos] [UI] Quitting the solver gracefully",
+            100.0,
             True,
         ),
     ],
 )
-def test_update_progress(
+def test_parse_log_lines(
     launch_progress_dto: LaunchProgressDTO,
     line: str,
-    expected_progression: float,
-    expected_output: bool,
+    expected_progress: float,
+    expected_result: bool,
 ):
-    output = launch_progress_dto.update_progress(line)
-    assert launch_progress_dto.progress == expected_progression
-    assert output == expected_output
+    output = launch_progress_dto.parse_log_lines([line])
+    assert launch_progress_dto.progress == expected_progress
+    assert output == expected_result
+
+
+class MyLaunchProgressDTO(LaunchProgressDTO):
+    update_history = []
+
+    def _update_progress(self, line: str) -> bool:
+        update = super()._update_progress(line)
+        if update:
+            self.update_history.append((line, self.progress))
+        return update
 
 
-def test_update_progress_with_real_log():
-    real_log = simulation_log.simulation_log.split("\n")
-    dto = LaunchProgressDTO()
-    for line in real_log:
-        if "Exporting the annual results" in line:
-            pre_update_progress = dto.progress
-            dto.update_progress(line)
-            assert (
-                dto.progress == pre_update_progress + 98 / dto.total_mc_years
-            )
-            continue
-        elif "Exporting the survey results" in line:
-            pre_update_progress = dto.progress
-            assert pre_update_progress < 99
-            dto.update_progress(line)
-            assert dto.progress == 99
-            continue
-        elif "Quitting the solver gracefully" in line:
-            assert dto.progress == 99
-            dto.update_progress(line)
-            assert dto.progress == 100
-            continue
-        dto.update_progress(line)
+def test_parse_log_lines__with_real_log():
+    dto = MyLaunchProgressDTO()
+    updated = dto.parse_log_lines(SIMULATION_LOG.splitlines())
+    assert updated
+    assert dto.progress == 100
     assert dto.total_mc_years == 2
+    # fmt: off
+    expected = [
+        ("[Wed Oct 14 14:25:04 2020][solver][infos] Loading the list of areas...", 1.0),
+        ("[Wed Oct 14 14:25:05 2020][solver][infos] MC-Years : [1 .. 2], total: 2", 2.0),
+        ("[Wed Oct 14 14:25:05 2020][solver][infos] Exporting the annual results", 50.0),
+        ("[Wed Oct 14 14:25:05 2020][solver][infos] Exporting the annual results", 98.0),
+        ("[Wed Oct 14 14:25:05 2020][solver][check] Exporting the survey results...", 99.0),
+        ("[Wed Oct 14 14:25:05 2020][solver][infos] [UI] Quitting the solver " "gracefully", 100.0),
+    ]
+    # fmt: on
+    assert dto.update_history == expected
diff --git a/tests/storage/integration/data/simulation_log.py b/tests/storage/integration/data/simulation_log.py
@@ -1,4 +1,5 @@
-simulation_log = """[Wed Oct 14 14:25:04 2020][solver][check] Antares Solver v7.0.0 (RTE France)
+SIMULATION_LOG = """\
+[Wed Oct 14 14:25:04 2020][solver][check] Antares Solver v7.0.0 (RTE France)
 [Wed Oct 14 14:25:04 2020][solver][infos]   :: built for 64-bit architectures, Microsoft Windows, 8 cpu(s)
 [Wed Oct 14 14:25:04 2020][solver][infos]   :: hostname = GROESNWP7
 [Wed Oct 14 14:25:04 2020][solver][infos]