Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLI format run benchopt #2

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions benchopt_benchmark/.github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Tests

on:
push:
branches:
- main
create:
tags:
- '**'
pull_request:
branches:
- main
schedule:
# Run every 1st of the month at 7:42am UTC.
- cron: '42 7 1 * *'

jobs:
benchopt_dev:
uses: benchopt/template_benchmark/.github/workflows/test_benchmarks.yml@main
with:
benchopt_branch: benchopt@main
benchopt_release:
uses: benchopt/template_benchmark/.github/workflows/test_benchmarks.yml@main
with:
benchopt_version: latest
lint:
uses: benchopt/template_benchmark/.github/workflows/lint_benchmarks.yml@main
18 changes: 18 additions & 0 deletions benchopt_benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Cache directories
.pytest_cache
__pycache__
__cache__
*.egg-info
.coverage
**/outputs
joblib/
/data/

# IDE specific folders
.vscode

# Config files
benchopt.ini

.DS_Store
coverage.xml
33 changes: 33 additions & 0 deletions benchopt_benchmark/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

Fast Optimizer Benchmark
=====================
|Build Status| |Python 3.9+|

Benchopt is a package to simplify and make more transparent and
reproducible the comparisons of optimization algorithms.
This benchmark is dedicated to optimization algorithm to train neural networks.

Install
--------

This benchmark can be run using the following commands:

.. code-block::

$ pip install -U benchopt
$ git clone https://github.com/automl/FOB
$ benchopt run FOB

Apart from the problem, options can be passed to ``benchopt run``, to restrict the benchmarks to some solvers or datasets, e.g.:

.. code-block::

$ benchopt run FOB -s solver1 -d dataset2 --max-runs 10 --n-repetitions 10


Use ``benchopt run -h`` for more details about these options, or visit https://benchopt.github.io/api.html.

.. |Build Status| image:: https://github.com/automl/FOB/workflows/Tests/badge.svg
:target: https://github.com/automl/FOB/actions
.. |Python 3.6+| image:: https://img.shields.io/badge/python-3.6%2B-blue
:target: https://www.python.org/downloads/release/python-360/
18 changes: 18 additions & 0 deletions benchopt_benchmark/benchmark_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# `benchmark_utils` is a module in which you can define code to reuse in
# the benchmark objective, datasets, and solvers. The folder should have the
# name `benchmark_utils`, and code defined inside will be importable using
# the usual import syntax. To import external packages in this file, use a
# `safe_import_context` named "import_ctx", as follows:

from benchopt.utils import safe_import_context

with safe_import_context() as import_ctx:
import numpy as np


def gradient_ols(X, y, beta):
return X.T @ (X @ beta - y)


def value_ols(X, y, beta):
return 0.5 * np.mean((y - X @ beta) ** 2)
29 changes: 29 additions & 0 deletions benchopt_benchmark/datasets/mnist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from benchopt import BaseDataset, safe_import_context
from benchopt.config import get_data_path

with safe_import_context() as import_ctx:
from pytorch_fob.tasks.mnist.data import MNISTDataModule
from pytorch_fob.tasks.mnist.model import MNISTModel


class Dataset(BaseDataset):
name = 'MNIST'

parameters = {
'num_hidden': [10],
'activation': ['Sigmoid', 'ReLU'],
'seed': [42, 47]
}

def get_data(self):
model = MNISTModel(
num_hidden=self.num_hidden,
activation=self.activation
)

data_dir = get_data_path('mnist')
data_module = MNISTDataModule(
data_dir=data_dir, seed=self.seed
)

return dict(model=model, data_module=data_module)
43 changes: 43 additions & 0 deletions benchopt_benchmark/datasets/simulated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from benchopt import BaseDataset, safe_import_context


# Protect the import with `safe_import_context()`. This allows:
# - skipping import to speed up autocompletion in CLI.
# - getting requirements info when all dependencies are not installed.
with safe_import_context() as import_ctx:
import numpy as np


# All datasets must be named `Dataset` and inherit from `BaseDataset`
class Dataset(BaseDataset):

# Name to select the dataset in the CLI and to display the results.
name = "Simulated"

# List of parameters to generate the datasets. The benchmark will consider
# the cross product for each key in the dictionary.
# Any parameters 'param' defined here is available as `self.param`.
parameters = {
'n_samples, n_features': [
(1000, 500),
(5000, 200),
],
'random_state': [27],
}

# List of packages needed to run the dataset. See the corresponding
# section in objective.py
requirements = []

def get_data(self):
# The return arguments of this function are passed as keyword arguments
# to `Objective.set_data`. This defines the benchmark's
# API to pass data. It is customizable for each benchmark.

# Generate pseudorandom data using `numpy`.
rng = np.random.RandomState(self.random_state)
X = rng.randn(self.n_samples, self.n_features)
y = rng.randn(self.n_samples)

# The dictionary defines the keyword arguments for `Objective.set_data`
return dict(X=X, y=y)
41 changes: 41 additions & 0 deletions benchopt_benchmark/objective.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from benchopt import BaseObjective, safe_import_context


with safe_import_context() as import_ctx:
from lightning import Trainer


class Objective(BaseObjective):
name = "FOB"

requirements = [
"pip::git+https://github.com/automl/FOB.git"
]

def set_data(self, model, data_module):
self.model = model
self.data_module = data_module

def evaluate_result(self, trainer: Trainer):
score_train = trainer.validate(self.model, datamodule=self.data_module)
score_val = trainer.validate(self.model, datamodule=self.data_module)
# TODO - Need to load the best checkpoint
score_test = trainer.test(self.model, datamodule=self.data_module)
return dict(
**{f'train_{k}': v for k, v in score_train[0].items()},
**{f'val_{k}': v for k, v in score_val[0].items()},
**{f'test_last_{k}': v for k, v in score_test[0].items()},
value=score_val[0]['val_loss'],
)

def get_objective(self):
return dict(
model=self.model,
data_module=self.data_module
)

def get_one_result(self):
return dict(trainer=Trainer(
devices=self.devices,
enable_progress_bar=True,
))
77 changes: 77 additions & 0 deletions benchopt_benchmark/solvers/sgd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from benchopt import BaseSolver, safe_import_context


with safe_import_context() as import_ctx:
from pytorch_fob.optimizers.sgd_baseline.optimizer import \
configure_optimizers
from pytorch_fob.optimizers.optimizers import OptimizerConfig
from pytorch_fob.engine.parameter_groups import GroupedModel
from lightning.pytorch.utilities.types import OptimizerLRScheduler
from lightning import Trainer, Callback


class Optimizer():
def __init__(self, config: OptimizerConfig) -> None:
self.config = config

def configure_optimizers(self, model: GroupedModel) -> OptimizerLRScheduler:
return configure_optimizers(model, self.config)


class Solver(BaseSolver):
name = 'SGD'

parameters = {
'learning_rate': [1e-3],
'weight_decay': [1e-4],
'momentum': [0.9],
'nesterov': [True],
'max_epochs': [200],
'eta_min_factor': [0.1],
'lr_interval': ['step'],
'batch_size': [64]
}
sampling_strategy = 'run_once'

def set_objective(self, model, data_module):
self.model = model
self.data_module = data_module

self.data_module.set_batch_size(self.batch_size)

config = OptimizerConfig(
optimizer_key='sgd',
task_key='benchopt',
config=dict(
sgd=dict(
name=self.name,
lr_interval=self.lr_interval,
learning_rate=self.learning_rate,
weight_decay=self.weight_decay,
momentum=self.momentum,
nesterov=self.nesterov,
eta_min_factor=self.eta_min_factor,
),
benchopt=dict(
max_epochs=self.max_epochs,
max_steps=200,
),
)
)

optimizer = Optimizer(config)
self.model.set_optimizer(optimizer)

def run(self, _):
# class BenchoptCallback(Callback):
# def on_train_epoch_end(self, trainer, pl_module):
# trainer.should_stop = not cb()

self.trainer = Trainer(
max_epochs=self.max_epochs,
# callbacks=[BenchoptCallback()]
)
self.trainer.fit(self.model, self.data_module)

def get_result(self):
return dict(trainer=self.trainer)
13 changes: 13 additions & 0 deletions benchopt_benchmark/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import sys # noqa: F401

import pytest # noqa: F401


def check_test_solver_install(solver_class):
"""Hook called in `test_solver_install`.

If one solver needs to be skip/xfailed on some
particular architecture, call pytest.xfail when
detecting the situation.
"""
pass
15 changes: 11 additions & 4 deletions pytorch_fob/tasks/mnist/data.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import torch
from torch.utils.data import random_split
from torchvision.datasets import MNIST
from torchvision import transforms
from pytorch_fob.engine.configs import TaskConfig

from pytorch_fob.tasks import TaskDataModule


class MNISTDataModule(TaskDataModule):
def __init__(self, config: TaskConfig):
super().__init__(config)
def __init__(self, data_dir, seed=None):
super().__init__(data_dir)
# split can also be a fraction self.train_val_split
# [55000, 5000] is taken from https://lightning.ai/docs/pytorch/stable/data/datamodule.html
self.train_val_split = [55000, 5000]
self.seed = seed

# TODO: check values
# https://lightning.ai/docs/pytorch/stable/data/datamodule.html
Expand All @@ -29,9 +31,14 @@ def setup(self, stage: str):
"""
# Assign train/val datasets for use in dataloaders
if stage == "fit":
generator = torch.Generator()
if self.seed is not None:
generator = generator.manual_seed(self.seed)
mnist_full = MNIST(str(self.data_dir), train=True, transform=self.transform)
# TODO (Zachi) confirm seed everything makes this reproducable:
self.data_train, self.data_val = random_split(mnist_full, self.train_val_split)
self.data_train, self.data_val = random_split(
mnist_full, self.train_val_split, generator=generator
)

# Assign test dataset for use in dataloader(s)
if stage == "test":
Expand Down
12 changes: 6 additions & 6 deletions pytorch_fob/tasks/mnist/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@


class MNISTModel(TaskModel):
def __init__(self, optimizer: Optimizer, config: TaskConfig):
def __init__(self, num_hidden: int, activation: str):

input_size = 28 * 28 # 784
num_classes = 10
num_hidden = config.model.num_hidden
activation = config.model.activation
if activation.lower() == "Sigmoid".lower():
num_hidden = num_hidden
activation = activation
if activation.lower() == "sigmoid":
self.activation = torch.nn.Sigmoid
elif activation.lower() == "ReLU".lower():
elif activation.lower() == "relu":
self.activation = torch.nn.ReLU
else:
raise NotImplementedError(f"{activation} is not supported for mnist yet")
Expand All @@ -25,7 +25,7 @@ def __init__(self, optimizer: Optimizer, config: TaskConfig):
self.activation(),
torch.nn.Linear(num_hidden, num_classes, bias=True),
)
super().__init__(model, optimizer, config)
super().__init__(model)
# negative log likelihood loss
self.loss_fn = torch.nn.functional.nll_loss

Expand Down
Loading