-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
120 lines (99 loc) · 3.43 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""An example run file which trains a dummy AutoML system on the training split of a dataset
and logs the accuracy score on the test set.
In the example data you are given access to the labels of the test split, however
in the test dataset we will provide later, you will not have access
to this and you will need to output your predictions for the images of the test set
to a file, which we will grade using github classrooms!
"""
from __future__ import annotations
from pathlib import Path
from sklearn.metrics import accuracy_score
import numpy as np
from automl.automl import AutoML
import argparse
import logging
from automl.datasets import FashionDataset, FlowersDataset, EmotionsDataset
logger = logging.getLogger(__name__)
def main(
dataset: str,
output_path: Path,
seed: int,
):
match dataset:
case "fashion":
dataset_class = FashionDataset
case "flowers":
dataset_class = FlowersDataset
case "emotions":
dataset_class = EmotionsDataset
case _:
raise ValueError(f"Invalid dataset: {args.dataset}")
logger.info("Fitting AutoML")
# You do not need to follow this setup or API it's merely here to provide
# an example of how your automl system could be used.
# As a general rule of thumb, you should **never** pass in any
# test data to your AutoML solution other than to generate predictions.
automl = AutoML(seed=seed)
# load the dataset and create a loader then pass it
automl.fit(dataset_class)
# Do the same for the test dataset
test_preds, test_labels = automl.predict(dataset_class)
# Write the predictions of X_test to disk
# This will be used by github classrooms to get a performance
# on the test set.
logger.info("Writing predictions to disk")
with output_path.open("wb") as f:
np.save(f, test_preds)
# check if test_labels has missing data
if not np.isnan(test_labels).any():
acc = accuracy_score(test_labels, test_preds)
logger.info(f"Accuracy on test set: {acc}")
else:
# This is the setting for the exam dataset, you will not have access to the labels
logger.info(f"No test split for dataset '{dataset}'")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--dataset",
type=str,
required=True,
help="The name of the dataset to run on.",
choices=["fashion", "flowers", "emotions"]
)
parser.add_argument(
"--output-path",
type=Path,
default=Path("predictions.npy"),
help=(
"The path to save the predictions to."
" By default this will just save to './predictions.npy'."
)
)
parser.add_argument(
"--seed",
type=int,
default=42,
help=(
"Random seed for reproducibility if you are using and randomness,"
" i.e. torch, numpy, pandas, sklearn, etc."
)
)
parser.add_argument(
"--quiet",
action="store_true",
help="Whether to log only warnings and errors."
)
args = parser.parse_args()
if not args.quiet:
logging.basicConfig(level=logging.INFO)
else:
logging.basicConfig(level=logging.WARNING)
logger.info(
f"Running dataset {args.dataset}"
f"\n{args}"
)
main(
dataset=args.dataset,
output_path=args.output_path,
seed=args.seed,
)