-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlgbm_experiment.py
62 lines (50 loc) · 1.92 KB
/
lgbm_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pandas as pd
import lightgbm as lgb
from experiment import run_experiment, create_run_name
from utilities import read_experiment_parameters
from test_cases import filter_based_on_test_case
from skopt.space import Real, Integer
import json
if __name__ == "__main__":
df = pd.read_parquet("./data/properties_noise_100.parquet.gzip")
# Reading experiment parameters.
experiment_id, experiment_name, experiment_tags, noise, mode, area = read_experiment_parameters()
print(f"Running with noise={noise} and area={area}")
df = filter_based_on_test_case(df, noise, area)
print(f"Total number of rows: {df.shape}")
y = df["Price"]
X = df.drop("Price", axis=1)
run_name = create_run_name("lgbm", noise, area)
metadata = {
"noise": noise,
"mode": mode,
"area": area
}
experiment_details = {
"id": experiment_id,
"name": experiment_name,
"artifact": "./data/properties_noise_100.parquet.gzip",
"tags": json.loads(experiment_tags) if experiment_tags is not None else None
}
# Initializing model.
model = lgb.LGBMRegressor()
# Setting up parameters.
if mode == "bayesian":
parameters = {
"max_depth": Integer(3, 10),
"num_leaves": Integer(10, 120),
"learning_rate": Real(0.01, 0.3, prior="log-uniform"),
"n_estimators": Integer(50, 1000),
"colsample_bytree": Real(0.3, 1, prior="log-uniform")
}
else:
parameters = {
"max_depth": [3, 4, 6, 5, 10],
"num_leaves": [10, 20, 30, 40, 100, 120],
"learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3],
"n_estimators": [50, 100, 300, 500, 700, 900, 1000],
"colsample_bytree": [0.3, 0.5, 0.7, 1]
}
# Run the experiment
run_experiment(model, parameters, X, y, 100, 0.2, "lgbm",
run_name, experiment_details, metadata, mode)