Skip to content

Commit

Permalink
feat(log_parser): use regex to implement log parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
laurent-laporte-pro committed Aug 1, 2023
1 parent 14e85a8 commit 6f5a2b1
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 61 deletions.
5 changes: 1 addition & 4 deletions antarest/launcher/adapters/abstractlauncher.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,7 @@ def update_log(log_line: str) -> None:
launch_progress_dto = LaunchProgressDTO.parse_obj(
launch_progress_json
)
progress_updated = False
for line in log_line.split("\n"):
progress_updated |= launch_progress_dto.update_progress(line)
if progress_updated:
if launch_progress_dto.parse_log_lines(log_line.splitlines()):
self.event_bus.push(
Event(
type=EventType.LAUNCH_PROGRESS,
Expand Down
102 changes: 82 additions & 20 deletions antarest/launcher/adapters/log_parser.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,96 @@
import logging
import functools
import re
import typing as t

from pydantic import BaseModel

logger = logging.getLogger(__name__)
_SearchFunc = t.Callable[[str], t.Optional[t.Match[str]]]

_compile = functools.partial(re.compile, flags=re.IGNORECASE | re.VERBOSE)

# Search for the line indicating the loading of areas (first line of data loading).
_loading_areas = t.cast(
_SearchFunc,
_compile(r"Loading \s+ the \s+ list \s+ of \s+ areas").search,
)

# Search for the total number of Monté-Carlo (MC) years.
_total_mc_years = t.cast(
_SearchFunc,
_compile(
r"""
MC-Years \s* : \s*
\[ \d+ \s* \.{2,3} \s* \d+ ], \s* total \s* : \s*
(?P<total_mc_years> \d+)
"""
).search,
)

# Search for the line indicating the export of annual results of a Monté-Carlo year.
_annual_results = t.cast(
_SearchFunc,
_compile(r"Exporting \s+ the \s+ annual \s+ results").search,
)

# Search for the line indicating the export of survey results.
_survey_results = t.cast(
_SearchFunc,
_compile(r"Exporting \s+ the \s+ survey \s+ results").search,
)

# Search for the line indicating the solver is quitting gracefully.
_quitting = t.cast(
_SearchFunc,
_compile(r"Quitting \s+ the \s+ solver \s+ gracefully").search,
)


class LaunchProgressDTO(BaseModel):
"""
Measure the progress of a study simulation.
The progress percentage is calculated based on the number of Monté-Carlo
years completed relative to the total number of years.
Attributes:
progress:
The percentage of completion for the simulation, ranging from 0 to 100.
total_mc_years:
The total number of Monté-Carlo years for the simulation.
"""

progress: float = 0
total_mc_years: int = 1

def update_progress(self, line: str) -> bool:
if "MC-Years : [" in line:
if regex_result := re.search(
r"MC-Years : \[\d+ .. \d+], total: (\d+)", line
):
self.total_mc_years = int(regex_result[1])
return True
else:
logger.warning(
f"Failed to extract log progress batch size on line : {line}"
)
return False
elif "Exporting the annual results" in line:
self.progress += 98 / self.total_mc_years
def _update_progress(self, line: str) -> bool:
"""Updates the progress based on the given log line."""
if _loading_areas(line):
self.progress = 1.0
return True
if mo := _total_mc_years(line):
self.progress = 2.0
self.total_mc_years = int(mo["total_mc_years"])
return True
if _annual_results(line):
self.progress += 96 / self.total_mc_years
return True
elif "Exporting the survey results" in line:
self.progress = 99
if _survey_results(line):
self.progress = 99.0
return True
elif "Quitting the solver gracefully" in line:
self.progress = 100
if _quitting(line):
self.progress = 100.0
return True
return False

def parse_log_lines(self, lines: t.Iterable[str]) -> bool:
"""
Parses a sequence of log lines and updates the progress accordingly.
Args:
lines (Iterable[str]): An iterable containing log lines to be parsed.
Returns:
bool: `True` if progress was updated at least once during the parsing,
`False` otherwise.
"""
return bool(sum(self._update_progress(line) for line in lines))
87 changes: 51 additions & 36 deletions tests/launcher/test_log_parser.py
Original file line number Diff line number Diff line change
@@ -1,76 +1,91 @@
import pytest

from antarest.launcher.adapters.log_parser import LaunchProgressDTO
from tests.storage.integration.data import simulation_log
from tests.storage.integration.data.simulation_log import SIMULATION_LOG


@pytest.mark.parametrize(
"launch_progress_dto,line,expected_progression,expected_output",
"launch_progress_dto, line, expected_progress, expected_result",
[
(
LaunchProgressDTO(total_mc_years=100),
"[infos] Loading the list of areas...",
1.0,
True,
),
(
LaunchProgressDTO(total_mc_years=100),
"[infos] MC-Years : [1 .. 11], total: 11",
0,
2.0,
True,
),
(
LaunchProgressDTO(total_mc_years=10),
"this is a test",
0,
0.0,
False,
),
(
LaunchProgressDTO(total_mc_years=100),
"[solver][infos] parallel batch size : 10",
0,
0.0,
False,
),
(
LaunchProgressDTO(total_mc_years=10),
"[solver][infos] Exporting the annual results",
9.8,
9.6,
True,
),
(
LaunchProgressDTO(total_mc_years=10),
"[solver][infos] Exporting the survey results",
99,
99.0,
True,
),
(
LaunchProgressDTO(total_mc_years=10),
"[infos] [UI] Quitting the solver gracefully",
100.0,
True,
),
],
)
def test_update_progress(
def test_parse_log_lines(
launch_progress_dto: LaunchProgressDTO,
line: str,
expected_progression: float,
expected_output: bool,
expected_progress: float,
expected_result: bool,
):
output = launch_progress_dto.update_progress(line)
assert launch_progress_dto.progress == expected_progression
assert output == expected_output
output = launch_progress_dto.parse_log_lines([line])
assert launch_progress_dto.progress == expected_progress
assert output == expected_result


class MyLaunchProgressDTO(LaunchProgressDTO):
update_history = []

def _update_progress(self, line: str) -> bool:
update = super()._update_progress(line)
if update:
self.update_history.append((line, self.progress))
return update


def test_update_progress_with_real_log():
real_log = simulation_log.simulation_log.split("\n")
dto = LaunchProgressDTO()
for line in real_log:
if "Exporting the annual results" in line:
pre_update_progress = dto.progress
dto.update_progress(line)
assert (
dto.progress == pre_update_progress + 98 / dto.total_mc_years
)
continue
elif "Exporting the survey results" in line:
pre_update_progress = dto.progress
assert pre_update_progress < 99
dto.update_progress(line)
assert dto.progress == 99
continue
elif "Quitting the solver gracefully" in line:
assert dto.progress == 99
dto.update_progress(line)
assert dto.progress == 100
continue
dto.update_progress(line)
def test_parse_log_lines__with_real_log():
dto = MyLaunchProgressDTO()
updated = dto.parse_log_lines(SIMULATION_LOG.splitlines())
assert updated
assert dto.progress == 100
assert dto.total_mc_years == 2
# fmt: off
expected = [
("[Wed Oct 14 14:25:04 2020][solver][infos] Loading the list of areas...", 1.0),
("[Wed Oct 14 14:25:05 2020][solver][infos] MC-Years : [1 .. 2], total: 2", 2.0),
("[Wed Oct 14 14:25:05 2020][solver][infos] Exporting the annual results", 50.0),
("[Wed Oct 14 14:25:05 2020][solver][infos] Exporting the annual results", 98.0),
("[Wed Oct 14 14:25:05 2020][solver][check] Exporting the survey results...", 99.0),
("[Wed Oct 14 14:25:05 2020][solver][infos] [UI] Quitting the solver " "gracefully", 100.0),
]
# fmt: on
assert dto.update_history == expected
3 changes: 2 additions & 1 deletion tests/storage/integration/data/simulation_log.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
simulation_log = """[Wed Oct 14 14:25:04 2020][solver][check] Antares Solver v7.0.0 (RTE France)
SIMULATION_LOG = """\
[Wed Oct 14 14:25:04 2020][solver][check] Antares Solver v7.0.0 (RTE France)
[Wed Oct 14 14:25:04 2020][solver][infos] :: built for 64-bit architectures, Microsoft Windows, 8 cpu(s)
[Wed Oct 14 14:25:04 2020][solver][infos] :: hostname = GROESNWP7
[Wed Oct 14 14:25:04 2020][solver][infos]
Expand Down

0 comments on commit 6f5a2b1

Please sign in to comment.