Skip to content

feat: Add unit tests for forecast operator API options #1237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ads/common/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

from distutils import dir_util
import shutil
import os
import shutil
from collections.abc import Iterable
Expand Down
77 changes: 76 additions & 1 deletion ads/opctl/operator/lowcode/forecast/operator_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,82 @@ class Tuning(DataClassSerializable):

@dataclass(repr=True)
class ForecastOperatorSpec(DataClassSerializable):
"""Class representing forecast operator specification."""
"""
Class representing forecast operator specification.

Attributes
----------
name: str
The name of the forecast operator.
historical_data: InputData
The historical data to be used for forecasting.
additional_data: InputData
Additional data to be used for forecasting.
test_data: TestData
The test data to be used for evaluating the forecast.
output_directory: OutputDirectory
The directory where the output files will be saved.
report_filename: str
The name of the report file. Defaults to "report.html".
report_title: str
The title of the report.
report_theme: str
The theme of the report. Can be "light" or "dark". Defaults to "light".
metrics_filename: str
The name of the metrics file. Defaults to "metrics.csv".
test_metrics_filename: str
The name of the test metrics file. Defaults to "test_metrics.csv".
forecast_filename: str
The name of the forecast file. Defaults to "forecast.csv".
global_explanation_filename: str
The name of the global explanation file. Defaults to "global_explanation.csv".
local_explanation_filename: str
The name of the local explanation file. Defaults to "local_explanation.csv".
target_column: str
The name of the target column.
preprocessing: DataPreprocessor
The data preprocessing settings.
datetime_column: DateTimeColumn
The datetime column details.
target_category_columns: List[str]
The list of target category columns.
generate_report: bool
Whether to generate a report. Defaults to True.
generate_forecast_file: bool
Whether to generate a forecast file. Defaults to True.
generate_metrics: bool
Whether to generate metrics. Defaults to True.
generate_metrics_file: bool
Whether to generate a metrics file. Defaults to True.
generate_explanations: bool
Whether to generate explanations. Defaults to False.
generate_explanation_files: bool
Whether to generate explanation files. Defaults to True.
explanations_accuracy_mode: str
The accuracy mode for explanations. Can be "HIGH_ACCURACY", "BALANCED", "FAST_APPROXIMATE", or "AUTOMLX".
horizon: int
The forecast horizon.
model: str
The forecasting model to be used.
model_kwargs: Dict
The keyword arguments for the model.
model_parameters: str
The model parameters.
previous_output_dir: str
The directory of a previous run to be used for forecasting.
generate_model_parameters: bool
Whether to generate model parameters. Defaults to False.
generate_model_pickle: bool
Whether to generate a model pickle. Defaults to False.
g confidence_interval_width: float
The width of the confidence interval. Defaults to 0.80.
metric: str
The metric to be used for evaluation.
tuning: Tuning
The tuning settings.
what_if_analysis: WhatIfAnalysis
The what-if analysis settings.
"""

name: str = None
historical_data: InputData = field(default_factory=InputData)
Expand Down
239 changes: 239 additions & 0 deletions tests/operators/forecast/test_api_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
#!/usr/bin/env python

# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import os
import tempfile
import pandas as pd
import pytest
from copy import deepcopy
from ads.opctl.operator.lowcode.forecast.__main__ import operate
from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorConfig

DATASET_PREFIX = f"{os.path.dirname(os.path.abspath(__file__))}/../data/timeseries/"

TEMPLATE_YAML = {
"kind": "operator",
"type": "forecast",
"version": "v1",
"spec": {
"historical_data": {
"url": f"{DATASET_PREFIX}dataset1.csv",
},
"output_directory": {
"url": "results",
},
"model": "prophet",
"target_column": "Y",
"datetime_column": {
"name": "Date",
},
"horizon": 5,
"generate_explanations": False,
},
}

@pytest.fixture(autouse=True)
def operator_setup():
with tempfile.TemporaryDirectory() as tmpdirname:
yield tmpdirname

class TestForecastApiOptions:
def test_custom_filenames(self, operator_setup):
"""Tests that custom filenames are correctly used."""
tmpdirname = operator_setup
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["report_filename"] = "my_report.html"
yaml_i["spec"]["metrics_filename"] = "my_metrics.csv"
yaml_i["spec"]["test_metrics_filename"] = "my_test_metrics.csv"
yaml_i["spec"]["forecast_filename"] = "my_forecast.csv"
yaml_i["spec"]["test_data"] = {
"url": f"{DATASET_PREFIX}dataset1.csv"
}

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
operate(operator_config)

output_files = os.listdir(tmpdirname)
assert "my_report.html" in output_files
assert "my_metrics.csv" in output_files
assert "my_test_metrics.csv" in output_files
assert "my_forecast.csv" in output_files

def test_report_theme(self, operator_setup):
"""Tests that the report theme is correctly applied."""
tmpdirname = operator_setup
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["report_theme"] = "dark"

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
operate(operator_config)

with open(os.path.join(tmpdirname, "report.html"), "r") as f:
report_content = f.read()
assert "dark" in report_content

def test_disable_report_generation(self, operator_setup):
"""Tests that report generation can be disabled."""
tmpdirname = operator_setup
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["generate_report"] = False

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
operate(operator_config)

output_files = os.listdir(tmpdirname)
assert "report.html" not in output_files

def test_previous_output_dir(self, operator_setup):
"""Tests that a previous model can be loaded."""
tmpdirname = operator_setup

# First run: generate a model
first_run_dir = os.path.join(tmpdirname, "first_run")
os.makedirs(first_run_dir)
yaml1 = deepcopy(TEMPLATE_YAML)
yaml1["spec"]["output_directory"]["url"] = first_run_dir
yaml1["spec"]["generate_model_pickle"] = True

operator_config1 = ForecastOperatorConfig.from_dict(yaml1)
operate(operator_config1)

# Second run: use the previous model
second_run_dir = os.path.join(tmpdirname, "second_run")
os.makedirs(second_run_dir)
yaml2 = deepcopy(TEMPLATE_YAML)
yaml2["spec"]["output_directory"]["url"] = second_run_dir
yaml2["spec"]["previous_output_dir"] = first_run_dir

operator_config2 = ForecastOperatorConfig.from_dict(yaml2)
operate(operator_config2)

# Check that the second run produced a forecast
output_files = os.listdir(second_run_dir)
assert "forecast.csv" in output_files

def test_generate_model_artifacts(self, operator_setup):
"""Tests that model artifacts are correctly generated."""
tmpdirname = operator_setup
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["generate_model_parameters"] = True
yaml_i["spec"]["generate_model_pickle"] = True

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
operate(operator_config)

output_files = os.listdir(tmpdirname)
assert "model_params.json" in output_files

def test_metric(self, operator_setup):
"""Tests that the metric is correctly used."""
tmpdirname = operator_setup
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["metric"] = "RMSE"
yaml_i["spec"]["test_data"] = {
"url": f"{DATASET_PREFIX}dataset1.csv"
}

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
operate(operator_config)

metrics = pd.read_csv(os.path.join(tmpdirname, "metrics.csv"))
assert "RMSE" in metrics["Metric"].values

def test_outlier_treatment(self, operator_setup):
"""Tests that outlier treatment is correctly applied."""
tmpdirname = operator_setup

# Create a dataset with outliers
data = pd.read_csv(f"{DATASET_PREFIX}dataset1.csv")
data.loc[5, "Y"] = 1000
data.loc[15, "Y"] = -1000
historical_data_path = os.path.join(tmpdirname, "historical_data.csv")
data.to_csv(historical_data_path, index=False)

# Run with outlier treatment
yaml_with = deepcopy(TEMPLATE_YAML)
yaml_with["spec"]["historical_data"]["url"] = historical_data_path
yaml_with["spec"]["output_directory"]["url"] = os.path.join(tmpdirname, "with_treatment")
yaml_with["spec"]["preprocessing"] = {"steps": {"outlier_treatment": True}}

operate(ForecastOperatorConfig.from_dict(yaml_with))

# Run without outlier treatment
yaml_without = deepcopy(TEMPLATE_YAML)
yaml_without["spec"]["historical_data"]["url"] = historical_data_path
yaml_without["spec"]["output_directory"]["url"] = os.path.join(tmpdirname, "without_treatment")
yaml_without["spec"]["preprocessing"] = {"steps": {"outlier_treatment": False}}

operate(ForecastOperatorConfig.from_dict(yaml_without))

# Check that outliers are present in the forecast without treatment
forecast_without = pd.read_csv(os.path.join(tmpdirname, "without_treatment", "forecast.csv"))
assert 1000 in forecast_without["yhat"].values
assert -1000 in forecast_without["yhat"].values

# Check that outliers are not present in the forecast with treatment
forecast_with = pd.read_csv(os.path.join(tmpdirname, "with_treatment", "forecast.csv"))
assert 1000 not in forecast_with["yhat"].values
assert -1000 not in forecast_with["yhat"].values

def test_missing_value_imputation(self, operator_setup):
"""Tests that missing value imputation is correctly applied."""
tmpdirname = operator_setup

# Create a dataset with missing values
data = pd.read_csv(f"{DATASET_PREFIX}dataset1.csv")
data.loc[5, "Y"] = None
data.loc[15, "Y"] = None
historical_data_path = os.path.join(tmpdirname, "historical_data.csv")
data.to_csv(historical_data_path, index=False)

# Run with missing value imputation
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["historical_data"]["url"] = historical_data_path
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["preprocessing"] = {"steps": {"missing_value_imputation": True}}

results = operate(ForecastOperatorConfig.from_dict(yaml_i))
forecast = results.get_forecast()

# Check that there are no missing values in the forecast
assert not forecast["yhat"].isnull().any()
assert "model.pkl" in output_files

def test_confidence_interval_width(self, operator_setup):
"""Tests that the confidence interval width is correctly applied."""
tmpdirname = operator_setup
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["confidence_interval_width"] = 0.95

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
results = operate(operator_config)
forecast = results.get_forecast()

# Check that the confidence interval is close to the specified width
# This is a basic check, a more robust check would involve statistical tests
assert "yhat_upper" in forecast.columns
assert "yhat_lower" in forecast.columns

def test_tuning(self, operator_setup):
"""Tests that tuning is correctly applied."""
tmpdirname = operator_setup
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["output_directory"]["url"] = tmpdirname
yaml_i["spec"]["tuning"] = {"n_trials": 5}
yaml_i["spec"]["generate_model_parameters"] = True

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
operate(operator_config)

output_files = os.listdir(tmpdirname)
assert "model_params.json" in output_files
Loading