-
Notifications
You must be signed in to change notification settings - Fork 1
/
Housing.py
54 lines (50 loc) · 2.14 KB
/
Housing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import cross_val_score
from joblib import dump, load
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
housing = pd.read_csv("data.csv")
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(housing, housing['CHAS']):
strat_train_set = housing.loc[train_index]
strat_test_set = housing.loc[test_index]
housing = strat_train_set.copy()
my_pipeline = Pipeline([
('imputer', SimpleImputer(strategy="median")),
# ..... add as many as you want in your pipeline
('std_scaler', StandardScaler()),
])
housing = strat_train_set.drop("MEDV", axis=1)
housing_labels = strat_train_set["MEDV"].copy()
housing_num_tr = my_pipeline.fit_transform(housing)
some_data = housing.iloc[:5]
some_labels = housing_labels.iloc[:5]
prepared_data = my_pipeline.transform(some_data)
model = RandomForestRegressor()
model.fit(housing_num_tr, housing_labels)
model.predict(prepared_data)
housing_predictions = model.predict(housing_num_tr)
mse = mean_squared_error(housing_labels, housing_predictions)
rmse = np.sqrt(mse)
scores = cross_val_score(model, housing_num_tr, housing_labels, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
dump(model, 'Dragon.joblib')
X_test = strat_test_set.drop("MEDV", axis=1)
Y_test = strat_test_set["MEDV"].copy()
X_test_prepared = my_pipeline.transform(X_test)
final_predictions = model.predict(X_test_prepared)
final_mse = mean_squared_error(Y_test, final_predictions)
final_rmse = np.sqrt(final_mse)
# print(final_predictions, list(Y_test))
from joblib import dump, load
import numpy as np
model = load('Dragon.joblib')
features = np.array([[-5.43942006, 4.12628155, -1.6165014, -0.67288841, -1.42262747,
-11.44443979304, -49.31238772, 7.61111401, -26.0016879 , -0.5778192 ,
-0.97491834, 0.41164221, -66.86091034]])
print(model.predict(features))