|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Spyder Editor |
| 4 | +
|
| 5 | +This is a temporary script file. |
| 6 | +""" |
| 7 | + |
| 8 | + |
| 9 | +# Step 1 - Load Data |
| 10 | +import pandas as pd |
| 11 | +dataset = pd.read_csv("iphone_purchase_records.csv") |
| 12 | +X = dataset.iloc[:,:-1].values |
| 13 | +y = dataset.iloc[:, 3].values |
| 14 | + |
| 15 | +# Step 2 - Convert Gender to number |
| 16 | +from sklearn.preprocessing import LabelEncoder |
| 17 | +labelEncoder_gender = LabelEncoder() |
| 18 | +X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) |
| 19 | + |
| 20 | +# Optional - if you want to convert X to float data type |
| 21 | +import numpy as np |
| 22 | +X = np.vstack(X[:, :]).astype(np.float) |
| 23 | + |
| 24 | +# Step 3 - Split Data |
| 25 | +from sklearn.model_selection import train_test_split |
| 26 | +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) |
| 27 | + |
| 28 | + |
| 29 | +# Step 4 - Fit Classifier |
| 30 | +from sklearn.ensemble import RandomForestClassifier |
| 31 | +classifier = RandomForestClassifier(n_estimators=100, criterion="entropy", random_state=0) |
| 32 | +classifier.fit(X_train, y_train) |
| 33 | + |
| 34 | +# Step 5 - Predict |
| 35 | +y_pred = classifier.predict(X_test) |
| 36 | + |
| 37 | +# Step 6 - Metrics |
| 38 | +#from sklearn import metrics |
| 39 | +#cm = metrics.confusion_matrix(y_test, y_pred) ## 5,3 errors |
| 40 | +#accuracy = metrics.accuracy_score(y_test, y_pred) ## 0.92 |
| 41 | +#precision = metrics.precision_score(y_test, y_pred) ## 0.85 |
| 42 | +#recall = metrics.recall_score(y_test, y_pred) ## 0.90 |
| 43 | + |
| 44 | +# Step 6 - Evaluate the model performance |
| 45 | +from sklearn import metrics |
| 46 | +cm = metrics.confusion_matrix(y_test, y_pred) |
| 47 | +print(cm) |
| 48 | +accuracy = metrics.accuracy_score(y_test, y_pred) |
| 49 | +print("Accuracy score:",accuracy) |
| 50 | +precision = metrics.precision_score(y_test, y_pred) |
| 51 | +print("Precision score:",precision) |
| 52 | +recall = metrics.recall_score(y_test, y_pred) |
| 53 | +print("Recall score:",recall) |
| 54 | + |
0 commit comments