-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecision_trees_experiment.py
60 lines (48 loc) · 1.92 KB
/
decision_trees_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from experiment import run_experiment, create_run_name
from utilities import read_experiment_parameters
from test_cases import filter_based_on_test_case
from skopt.space import Categorical, Integer
import json
if __name__ == "__main__":
df = pd.read_parquet("./data/properties_noise_100.parquet.gzip")
# Reading experiment parameters.
experiment_id, experiment_name, experiment_tags, noise, mode, area = read_experiment_parameters()
print(f"Running with noise={noise} and area={area}")
df = filter_based_on_test_case(df, noise, area)
print(f"Total number of rows: {df.shape}")
y = df["Price"]
X = df.drop("Price", axis=1)
run_name = create_run_name("decision_trees", noise, area)
metadata = {
"noise": noise,
"mode": mode,
"area": area
}
experiment_details = {
"id": experiment_id,
"name": experiment_name,
"artifact": "./data/properties_noise_100.parquet.gzip",
"tags": json.loads(experiment_tags) if experiment_tags is not None else None
}
# Initializing model.
model = DecisionTreeRegressor()
# Setting up parameters.
if mode == "bayesian":
parameters = {
"criterion": Categorical(["squared_error", "friedman_mse", "absolute_error", "poisson"]),
"splitter": Categorical(["best", "random"]),
"max_depth": Integer(2, 20),
"min_samples_leaf": Integer(5, 80)
}
else:
parameters = {
"criterion": ["squared_error", "friedman_mse", "absolute_error", "poisson"],
"splitter": ["best", "random"],
"max_depth": [2, 3, 5, 7, 10, 15, 20],
"min_samples_leaf": [5, 8, 10, 15, 40, 80]
}
# Run the experiment
run_experiment(model, parameters, X, y, 100, 0.2, "decision-trees",
run_name, experiment_details, metadata, mode)