diff --git a/ads/common/model.py b/ads/common/model.py index bfee5384f..59d034f31 100644 --- a/ads/common/model.py +++ b/ads/common/model.py @@ -4,7 +4,7 @@ # Copyright (c) 2020, 2022 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -from distutils import dir_util +import shutil import os import shutil from collections.abc import Iterable diff --git a/ads/opctl/operator/lowcode/forecast/operator_config.py b/ads/opctl/operator/lowcode/forecast/operator_config.py index 23ec5b959..3709bb6f5 100644 --- a/ads/opctl/operator/lowcode/forecast/operator_config.py +++ b/ads/opctl/operator/lowcode/forecast/operator_config.py @@ -93,7 +93,82 @@ class Tuning(DataClassSerializable): @dataclass(repr=True) class ForecastOperatorSpec(DataClassSerializable): - """Class representing forecast operator specification.""" + """ + Class representing forecast operator specification. + + Attributes + ---------- + name: str + The name of the forecast operator. + historical_data: InputData + The historical data to be used for forecasting. + additional_data: InputData + Additional data to be used for forecasting. + test_data: TestData + The test data to be used for evaluating the forecast. + output_directory: OutputDirectory + The directory where the output files will be saved. + report_filename: str + The name of the report file. Defaults to "report.html". + report_title: str + The title of the report. + report_theme: str + The theme of the report. Can be "light" or "dark". Defaults to "light". + metrics_filename: str + The name of the metrics file. Defaults to "metrics.csv". + test_metrics_filename: str + The name of the test metrics file. Defaults to "test_metrics.csv". + forecast_filename: str + The name of the forecast file. Defaults to "forecast.csv". + global_explanation_filename: str + The name of the global explanation file. Defaults to "global_explanation.csv". + local_explanation_filename: str + The name of the local explanation file. Defaults to "local_explanation.csv". + target_column: str + The name of the target column. + preprocessing: DataPreprocessor + The data preprocessing settings. + datetime_column: DateTimeColumn + The datetime column details. + target_category_columns: List[str] + The list of target category columns. + generate_report: bool + Whether to generate a report. Defaults to True. + generate_forecast_file: bool + Whether to generate a forecast file. Defaults to True. + generate_metrics: bool + Whether to generate metrics. Defaults to True. + generate_metrics_file: bool + Whether to generate a metrics file. Defaults to True. + generate_explanations: bool + Whether to generate explanations. Defaults to False. + generate_explanation_files: bool + Whether to generate explanation files. Defaults to True. + explanations_accuracy_mode: str + The accuracy mode for explanations. Can be "HIGH_ACCURACY", "BALANCED", "FAST_APPROXIMATE", or "AUTOMLX". + horizon: int + The forecast horizon. + model: str + The forecasting model to be used. + model_kwargs: Dict + The keyword arguments for the model. + model_parameters: str + The model parameters. + previous_output_dir: str + The directory of a previous run to be used for forecasting. + generate_model_parameters: bool + Whether to generate model parameters. Defaults to False. + generate_model_pickle: bool + Whether to generate a model pickle. Defaults to False. +g confidence_interval_width: float + The width of the confidence interval. Defaults to 0.80. + metric: str + The metric to be used for evaluation. + tuning: Tuning + The tuning settings. + what_if_analysis: WhatIfAnalysis + The what-if analysis settings. + """ name: str = None historical_data: InputData = field(default_factory=InputData) diff --git a/tests/operators/forecast/test_api_options.py b/tests/operators/forecast/test_api_options.py new file mode 100644 index 000000000..ea092fd0e --- /dev/null +++ b/tests/operators/forecast/test_api_options.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python + +# Copyright (c) 2023, 2025 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +import os +import tempfile +import pandas as pd +import pytest +from copy import deepcopy +from ads.opctl.operator.lowcode.forecast.__main__ import operate +from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorConfig + +DATASET_PREFIX = f"{os.path.dirname(os.path.abspath(__file__))}/../data/timeseries/" + +TEMPLATE_YAML = { + "kind": "operator", + "type": "forecast", + "version": "v1", + "spec": { + "historical_data": { + "url": f"{DATASET_PREFIX}dataset1.csv", + }, + "output_directory": { + "url": "results", + }, + "model": "prophet", + "target_column": "Y", + "datetime_column": { + "name": "Date", + }, + "horizon": 5, + "generate_explanations": False, + }, +} + +@pytest.fixture(autouse=True) +def operator_setup(): + with tempfile.TemporaryDirectory() as tmpdirname: + yield tmpdirname + +class TestForecastApiOptions: + def test_custom_filenames(self, operator_setup): + """Tests that custom filenames are correctly used.""" + tmpdirname = operator_setup + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["report_filename"] = "my_report.html" + yaml_i["spec"]["metrics_filename"] = "my_metrics.csv" + yaml_i["spec"]["test_metrics_filename"] = "my_test_metrics.csv" + yaml_i["spec"]["forecast_filename"] = "my_forecast.csv" + yaml_i["spec"]["test_data"] = { + "url": f"{DATASET_PREFIX}dataset1.csv" + } + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + operate(operator_config) + + output_files = os.listdir(tmpdirname) + assert "my_report.html" in output_files + assert "my_metrics.csv" in output_files + assert "my_test_metrics.csv" in output_files + assert "my_forecast.csv" in output_files + + def test_report_theme(self, operator_setup): + """Tests that the report theme is correctly applied.""" + tmpdirname = operator_setup + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["report_theme"] = "dark" + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + operate(operator_config) + + with open(os.path.join(tmpdirname, "report.html"), "r") as f: + report_content = f.read() + assert "dark" in report_content + + def test_disable_report_generation(self, operator_setup): + """Tests that report generation can be disabled.""" + tmpdirname = operator_setup + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["generate_report"] = False + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + operate(operator_config) + + output_files = os.listdir(tmpdirname) + assert "report.html" not in output_files + + def test_previous_output_dir(self, operator_setup): + """Tests that a previous model can be loaded.""" + tmpdirname = operator_setup + + # First run: generate a model + first_run_dir = os.path.join(tmpdirname, "first_run") + os.makedirs(first_run_dir) + yaml1 = deepcopy(TEMPLATE_YAML) + yaml1["spec"]["output_directory"]["url"] = first_run_dir + yaml1["spec"]["generate_model_pickle"] = True + + operator_config1 = ForecastOperatorConfig.from_dict(yaml1) + operate(operator_config1) + + # Second run: use the previous model + second_run_dir = os.path.join(tmpdirname, "second_run") + os.makedirs(second_run_dir) + yaml2 = deepcopy(TEMPLATE_YAML) + yaml2["spec"]["output_directory"]["url"] = second_run_dir + yaml2["spec"]["previous_output_dir"] = first_run_dir + + operator_config2 = ForecastOperatorConfig.from_dict(yaml2) + operate(operator_config2) + + # Check that the second run produced a forecast + output_files = os.listdir(second_run_dir) + assert "forecast.csv" in output_files + + def test_generate_model_artifacts(self, operator_setup): + """Tests that model artifacts are correctly generated.""" + tmpdirname = operator_setup + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["generate_model_parameters"] = True + yaml_i["spec"]["generate_model_pickle"] = True + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + operate(operator_config) + + output_files = os.listdir(tmpdirname) + assert "model_params.json" in output_files + + def test_metric(self, operator_setup): + """Tests that the metric is correctly used.""" + tmpdirname = operator_setup + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["metric"] = "RMSE" + yaml_i["spec"]["test_data"] = { + "url": f"{DATASET_PREFIX}dataset1.csv" + } + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + operate(operator_config) + + metrics = pd.read_csv(os.path.join(tmpdirname, "metrics.csv")) + assert "RMSE" in metrics["Metric"].values + + def test_outlier_treatment(self, operator_setup): + """Tests that outlier treatment is correctly applied.""" + tmpdirname = operator_setup + + # Create a dataset with outliers + data = pd.read_csv(f"{DATASET_PREFIX}dataset1.csv") + data.loc[5, "Y"] = 1000 + data.loc[15, "Y"] = -1000 + historical_data_path = os.path.join(tmpdirname, "historical_data.csv") + data.to_csv(historical_data_path, index=False) + + # Run with outlier treatment + yaml_with = deepcopy(TEMPLATE_YAML) + yaml_with["spec"]["historical_data"]["url"] = historical_data_path + yaml_with["spec"]["output_directory"]["url"] = os.path.join(tmpdirname, "with_treatment") + yaml_with["spec"]["preprocessing"] = {"steps": {"outlier_treatment": True}} + + operate(ForecastOperatorConfig.from_dict(yaml_with)) + + # Run without outlier treatment + yaml_without = deepcopy(TEMPLATE_YAML) + yaml_without["spec"]["historical_data"]["url"] = historical_data_path + yaml_without["spec"]["output_directory"]["url"] = os.path.join(tmpdirname, "without_treatment") + yaml_without["spec"]["preprocessing"] = {"steps": {"outlier_treatment": False}} + + operate(ForecastOperatorConfig.from_dict(yaml_without)) + + # Check that outliers are present in the forecast without treatment + forecast_without = pd.read_csv(os.path.join(tmpdirname, "without_treatment", "forecast.csv")) + assert 1000 in forecast_without["yhat"].values + assert -1000 in forecast_without["yhat"].values + + # Check that outliers are not present in the forecast with treatment + forecast_with = pd.read_csv(os.path.join(tmpdirname, "with_treatment", "forecast.csv")) + assert 1000 not in forecast_with["yhat"].values + assert -1000 not in forecast_with["yhat"].values + + def test_missing_value_imputation(self, operator_setup): + """Tests that missing value imputation is correctly applied.""" + tmpdirname = operator_setup + + # Create a dataset with missing values + data = pd.read_csv(f"{DATASET_PREFIX}dataset1.csv") + data.loc[5, "Y"] = None + data.loc[15, "Y"] = None + historical_data_path = os.path.join(tmpdirname, "historical_data.csv") + data.to_csv(historical_data_path, index=False) + + # Run with missing value imputation + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["historical_data"]["url"] = historical_data_path + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["preprocessing"] = {"steps": {"missing_value_imputation": True}} + + results = operate(ForecastOperatorConfig.from_dict(yaml_i)) + forecast = results.get_forecast() + + # Check that there are no missing values in the forecast + assert not forecast["yhat"].isnull().any() + assert "model.pkl" in output_files + + def test_confidence_interval_width(self, operator_setup): + """Tests that the confidence interval width is correctly applied.""" + tmpdirname = operator_setup + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["confidence_interval_width"] = 0.95 + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + results = operate(operator_config) + forecast = results.get_forecast() + + # Check that the confidence interval is close to the specified width + # This is a basic check, a more robust check would involve statistical tests + assert "yhat_upper" in forecast.columns + assert "yhat_lower" in forecast.columns + + def test_tuning(self, operator_setup): + """Tests that tuning is correctly applied.""" + tmpdirname = operator_setup + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["output_directory"]["url"] = tmpdirname + yaml_i["spec"]["tuning"] = {"n_trials": 5} + yaml_i["spec"]["generate_model_parameters"] = True + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + operate(operator_config) + + output_files = os.listdir(tmpdirname) + assert "model_params.json" in output_files