Skip to content

Commit

Permalink
perf(raw-study): improve INI file reading performance for RAW studies (
Browse files Browse the repository at this point in the history
…#1879)

Merge pull request #1879 from AntaresSimulatorTeam/feature/perf-ini-file-reading
  • Loading branch information
laurent-laporte-pro authored Jan 3, 2024
2 parents a93c62d + 1841c3d commit eb33e5c
Show file tree
Hide file tree
Showing 39 changed files with 516 additions and 330 deletions.
4 changes: 2 additions & 2 deletions antarest/launcher/adapters/slurm_launcher/slurm_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
from antarest.launcher.adapters.abstractlauncher import AbstractLauncher, LauncherCallbacks, LauncherInitException
from antarest.launcher.adapters.log_manager import LogTailManager
from antarest.launcher.model import JobStatus, LauncherParametersDTO, LogType, XpansionParametersDTO
from antarest.study.storage.rawstudy.io.reader import IniReader
from antarest.study.storage.rawstudy.io.writer.ini_writer import IniWriter
from antarest.study.storage.rawstudy.ini_reader import IniReader
from antarest.study.storage.rawstudy.ini_writer import IniWriter

logger = logging.getLogger(__name__)
logging.getLogger("paramiko").setLevel("WARN")
Expand Down
200 changes: 200 additions & 0 deletions antarest/study/storage/rawstudy/ini_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import typing as t
from abc import ABC, abstractmethod
from pathlib import Path

from antarest.core.model import JSON, SUB_JSON


def convert_value(value: str) -> t.Union[str, int, float, bool]:
"""Convert value to the appropriate type for JSON."""

try:
# Infinity values are not supported by JSON, so we use a string instead.
mapping = {"true": True, "false": False, "+inf": "+Inf", "-inf": "-Inf", "inf": "+Inf"}
return t.cast(t.Union[str, int, float, bool], mapping[value.lower()])
except KeyError:
try:
return int(value)
except ValueError:
try:
return float(value)
except ValueError:
return value


def convert_obj(item: t.Any) -> SUB_JSON:
"""Convert object to the appropriate type for JSON (scalar, dictionary or list)."""

if isinstance(item, dict):
return {key: convert_obj(value) for key, value in item.items()}
elif isinstance(item, list):
return [convert_obj(value) for value in item]
else:
return convert_value(item)


class IReader(ABC):
"""
Init file Reader interface
"""

@abstractmethod
def read(self, path: t.Any) -> JSON:
"""
Parse `.ini` file to json object.
Args:
path: Path to `.ini` file or file-like object.
Returns:
Dictionary of parsed `.ini` file which can be converted to JSON.
"""
raise NotImplementedError()


class IniReader(IReader):
"""
Custom `.ini` reader for `.ini` files which have duplicate keys in a section.
This class is required, to parse `settings/generaldata.ini` files which
has duplicate keys like "playlist_year_weight", "playlist_year +", "playlist_year -",
"select_var -", "select_var +", in the `[playlist]` section.
For instance::
[playlist]
playlist_reset = false
playlist_year + = 6
playlist_year + = 8
playlist_year + = 13
It is also required to parse `input/areas/sets.ini` files which have keys like "+" or "-".
For instance::
[all areas]
caption = All areas
comments = Spatial aggregates on all areas
+ = east
+ = west
This class is not compatible with standard `.ini` readers.
"""

def __init__(self, special_keys: t.Sequence[str] = (), section_name: str = "settings") -> None:
super().__init__()

# Default section name to use if `.ini` file has no section.
self._special_keys = set(special_keys)

# List of keys which should be parsed as list.
self._section_name = section_name

def __repr__(self) -> str: # pragma: no cover
"""Return a string representation of the object."""
cls = self.__class__.__name__
# use getattr() to make sure that the attributes are defined
special_keys = tuple(getattr(self, "_special_keys", ()))
section_name = getattr(self, "_section_name", "settings")
return f"{cls}(special_keys={special_keys!r}, section_name={section_name!r})"

def read(self, path: t.Any) -> JSON:
if isinstance(path, (Path, str)):
try:
with open(path, mode="r", encoding="utf-8") as f:
sections = self._parse_ini_file(f)
except UnicodeDecodeError:
# On windows, `.ini` files may use "cp1252" encoding
with open(path, mode="r", encoding="cp1252") as f:
sections = self._parse_ini_file(f)
except FileNotFoundError:
# If the file is missing, an empty dictionary is returned.
# This is required tp mimic the behavior of `configparser.ConfigParser`.
return {}

elif hasattr(path, "read"):
with path:
sections = self._parse_ini_file(path)

else: # pragma: no cover
raise TypeError(repr(type(path)))

return t.cast(JSON, convert_obj(sections))

def _parse_ini_file(self, ini_file: t.TextIO) -> JSON:
"""
Parse `.ini` file to JSON object.
The following parsing rules are applied:
- If the file has no section, then the default section name is used.
This case is required to parse Xpansion `user/expansion/settings.ini` files
(using `SimpleKeyValueReader` subclass).
- If the file has duplicate sections, then the values are merged.
This case is required when the end-user produced an ill-formed `.ini` file.
This ensures the parsing is robust even if some values may be lost.
- If a section has duplicate keys, then the values are merged.
This case is required, for instance, to parse `settings/generaldata.ini` files which
has duplicate keys like "playlist_year_weight", "playlist_year +", "playlist_year -",
"select_var -", "select_var +", in the `[playlist]` section.
In this case, duplicate keys must be declared in the `special_keys` argument,
to parse them as list.
- If a section has no key, then an empty dictionary is returned.
This case is required to parse `input/hydro/prepro/correlation.ini` files.
- If a section name has square brackets, then they are preserved.
This case is required to parse `input/hydro/allocation/{area-id}.ini` files.
Args:
ini_file: file or file-like object.
Returns:
Dictionary of parsed `.ini` file which can be converted to JSON.
"""
# NOTE: This algorithm is 1.93x faster than configparser.ConfigParser
sections: t.Dict[str, t.Dict[str, t.Any]] = {}
section_name = self._section_name

for line in ini_file:
line = line.strip()
if not line or line.startswith(";") or line.startswith("#"):
continue
elif line.startswith("["):
section_name = line[1:-1]
sections.setdefault(section_name, {})
elif "=" in line:
key, value = map(str.strip, line.split("=", 1))
section = sections.setdefault(section_name, {})
if key in self._special_keys:
section.setdefault(key, []).append(value)
else:
section[key] = value
else:
raise ValueError(f"☠☠☠ Invalid line: {line!r}")

return sections


class SimpleKeyValueReader(IniReader):
"""
Simple INI reader for "settings.ini" file which has no section.
"""

def read(self, path: t.Any) -> JSON:
"""
Parse `.ini` file which has no section to JSON object.
This class is required to parse Xpansion `user/expansion/settings.ini` files.
Args:
path: Path to `.ini` file or file-like object.
Returns:
Dictionary of parsed key/value pairs.
"""
sections = super().read(path)
obj = t.cast(t.Mapping[str, JSON], sections)
return obj[self._section_name]
Original file line number Diff line number Diff line change
@@ -1,30 +1,20 @@
import ast
from configparser import RawConfigParser
import configparser
import typing as t
from pathlib import Path
from typing import Any, List, Optional

from antarest.core.model import JSON
from antarest.study.storage.rawstudy.io.reader import IniReader


class IniConfigParser(RawConfigParser):
def __init__(self, special_keys: Optional[List[str]] = None) -> None:
class IniConfigParser(configparser.RawConfigParser):
def __init__(self, special_keys: t.Optional[t.List[str]] = None) -> None:
super().__init__()
self.special_keys = special_keys

# noinspection SpellCheckingInspection
def optionxform(self, optionstr: str) -> str:
return optionstr

@staticmethod
def format_value(value: Any) -> Any:
parsed_value = IniReader.parse_value(value)
if isinstance(parsed_value, bool):
return str(parsed_value).lower()
elif isinstance(parsed_value, float):
return "%.6f" % parsed_value
return value

def _write_line( # type:ignore
self,
delimiter,
Expand Down Expand Up @@ -66,7 +56,7 @@ class IniWriter:
Standard INI writer.
"""

def __init__(self, special_keys: Optional[List[str]] = None):
def __init__(self, special_keys: t.Optional[t.List[str]] = None):
self.special_keys = special_keys

def write(self, data: JSON, path: Path) -> None:
Expand Down
Empty file.
3 changes: 0 additions & 3 deletions antarest/study/storage/rawstudy/io/reader/__init__.py

This file was deleted.

Loading

0 comments on commit eb33e5c

Please sign in to comment.