-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 6ce25d1
Showing
21 changed files
with
462 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Country,Age,Salary,Purchased | ||
France,44,72000,No | ||
Spain,27,48000,Yes | ||
Germany,30,54000,No | ||
Spain,38,61000,No | ||
Germany,40,,Yes | ||
France,35,58000,Yes | ||
Spain,,52000,No | ||
France,48,79000,Yes | ||
Germany,50,83000,No | ||
France,37,67000,Yes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Data Preprocessing | ||
|
||
# Importing the dataset | ||
dataset = read.csv('Data.csv') | ||
|
||
# Taking care of missing data | ||
dataset$Age = ifelse(is.na(dataset$Age), | ||
ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)), | ||
dataset$Age) | ||
dataset$Salary = ifelse(is.na(dataset$Salary), | ||
ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)), | ||
dataset$Salary) | ||
|
||
# Encoding categorical data | ||
dataset$Country = factor(dataset$Country, | ||
levels = c('France', 'Spain', 'Germany'), | ||
labels = c(1, 2, 3)) | ||
dataset$Purchased = factor(dataset$Purchased, | ||
levels = c('No', 'Yes'), | ||
labels = c(0, 1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Data Preprocessing | ||
|
||
# Importing the libraries | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
|
||
# Importing the dataset | ||
dataset = pd.read_csv('Data.csv') | ||
X = dataset.iloc[:, :-1].values | ||
y = dataset.iloc[:, 3].values | ||
|
||
# Taking care of missing data | ||
from sklearn.preprocessing import Imputer | ||
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) | ||
imputer = imputer.fit(X[:, 1:3]) | ||
X[:, 1:3] = imputer.transform(X[:, 1:3]) | ||
|
||
# Encoding categorical data | ||
# Encoding the Independent Variable | ||
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | ||
labelencoder_X = LabelEncoder() | ||
X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) | ||
onehotencoder = OneHotEncoder(categorical_features = [0]) | ||
X = onehotencoder.fit_transform(X).toarray() | ||
# Encoding the Dependent Variable | ||
labelencoder_y = LabelEncoder() | ||
y = labelencoder_y.fit_transform(y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Data Preprocessing Template | ||
|
||
# Importing the libraries | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
|
||
# Importing the dataset | ||
dataset = pd.read_csv('Data.csv') | ||
X = dataset.iloc[:, :-1].values | ||
y = dataset.iloc[:, 3].values | ||
|
||
#Take care of misssing data | ||
from sklearn.preprocessing import Imputer | ||
imputer=Imputer(missing_values="NaN", strategy="mean", axis=0) | ||
imputer = imputer.fit(X[:, 1:3]) | ||
X[:,1:3]=imputer.transform(X[:,1:3]) | ||
|
||
|
||
#Encoding Categorical data | ||
from sklearn.preprocessing import OneHotEncoder, LabelEncoder | ||
labelencoder_x = LabelEncoder() | ||
X[:,0] = labelencoder_x.fit_transform(X[:,0]) | ||
onehotencoder = OneHotEncoder(categorical_features=[0]) | ||
X = onehotencoder.fit_transform(X).toarray() | ||
|
||
labelencoder_y = LabelEncoder() | ||
y = labelencoder_y.fit_transform(y) | ||
|
||
# Splitting the dataset into the Training set and Test set | ||
from sklearn.model_selection import train_test_split | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) | ||
|
||
# Feature Scaling | ||
from sklearn.preprocessing import StandardScaler | ||
sc_X = StandardScaler() | ||
X_train = sc_X.fit_transform(X_train) | ||
X_test = sc_X.transform(X_test) | ||
sc_y = StandardScaler() | ||
y_train = sc_y.fit_transform(y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Data Preprocessing Template | ||
|
||
# Importing the dataset | ||
dataset = read.csv('Data.csv') | ||
|
||
# Splitting the dataset into the Training set and Test set | ||
# install.packages('caTools') | ||
library(caTools) | ||
set.seed(123) | ||
split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) | ||
training_set = subset(dataset, split == TRUE) | ||
test_set = subset(dataset, split == FALSE) | ||
|
||
# Feature Scaling | ||
# training_set = scale(training_set) | ||
# test_set = scale(test_set) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Data Preprocessing | ||
|
||
# Importing the dataset | ||
dataset = read.csv('Data.csv') | ||
|
||
# Taking care of missing data | ||
dataset$Age = ifelse(is.na(dataset$Age), | ||
ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)), | ||
dataset$Age) | ||
dataset$Salary = ifelse(is.na(dataset$Salary), | ||
ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)), | ||
dataset$Salary) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Data Preprocessing | ||
|
||
# Importing the libraries | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
|
||
# Importing the dataset | ||
dataset = pd.read_csv('Data.csv') | ||
X = dataset.iloc[:, :-1].values | ||
y = dataset.iloc[:, 3].values | ||
|
||
# Taking care of missing data | ||
from sklearn.preprocessing import Imputer | ||
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) | ||
imputer = imputer.fit(X[:, 1:3]) | ||
X[:, 1:3] = imputer.transform(X[:, 1:3]) |
31 changes: 31 additions & 0 deletions
31
Part 2 - Regression/Section 4 - Simple Linear Regression/Salary_Data.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
YearsExperience,Salary | ||
1.1,39343.00 | ||
1.3,46205.00 | ||
1.5,37731.00 | ||
2.0,43525.00 | ||
2.2,39891.00 | ||
2.9,56642.00 | ||
3.0,60150.00 | ||
3.2,54445.00 | ||
3.2,64445.00 | ||
3.7,57189.00 | ||
3.9,63218.00 | ||
4.0,55794.00 | ||
4.0,56957.00 | ||
4.1,57081.00 | ||
4.5,61111.00 | ||
4.9,67938.00 | ||
5.1,66029.00 | ||
5.3,83088.00 | ||
5.9,81363.00 | ||
6.0,93940.00 | ||
6.8,91738.00 | ||
7.1,98273.00 | ||
7.9,101302.00 | ||
8.2,113812.00 | ||
8.7,109431.00 | ||
9.0,105582.00 | ||
9.5,116969.00 | ||
9.6,112635.00 | ||
10.3,122391.00 | ||
10.5,121872.00 |
28 changes: 28 additions & 0 deletions
28
Part 2 - Regression/Section 4 - Simple Linear Regression/simple_linear_regression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#Simple Linear Regression | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
|
||
# Importing the dataset | ||
dataset = pd.read_csv('Salary_Data.csv') | ||
X = dataset.iloc[:, :-1].values | ||
y = dataset.iloc[:, 1].values | ||
|
||
from sklearn.model_selection import train_test_split | ||
X_train,X_test, y_train, y_test=train_test_split(X,y,test_size=1/3, random_state=0) | ||
|
||
#Fittin Simple Linear Regression | ||
from sklearn.linear_model import LinearRegression | ||
regressor = LinearRegression() | ||
regressor.fit(X_train, y_train) | ||
|
||
#Predicting the results | ||
y_pred = regressor.predict(X_test) | ||
|
||
#Visualizing the predictions | ||
plt.scatter(X_train,y_train, color='red') | ||
plt.plot(X_train, regressor.predict(X_train), color='blue') | ||
plt.title('Salary vs Experience (Training set)') | ||
plt.xlabel('Years of Experience') | ||
plt.ylabel('Salary') | ||
plt.show() |
51 changes: 51 additions & 0 deletions
51
Part 2 - Regression/Section 5 - Multiple Linear Regression/50_Startups.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
R&D Spend,Administration,Marketing Spend,State,Profit | ||
165349.2,136897.8,471784.1,New York,192261.83 | ||
162597.7,151377.59,443898.53,California,191792.06 | ||
153441.51,101145.55,407934.54,Florida,191050.39 | ||
144372.41,118671.85,383199.62,New York,182901.99 | ||
142107.34,91391.77,366168.42,Florida,166187.94 | ||
131876.9,99814.71,362861.36,New York,156991.12 | ||
134615.46,147198.87,127716.82,California,156122.51 | ||
130298.13,145530.06,323876.68,Florida,155752.6 | ||
120542.52,148718.95,311613.29,New York,152211.77 | ||
123334.88,108679.17,304981.62,California,149759.96 | ||
101913.08,110594.11,229160.95,Florida,146121.95 | ||
100671.96,91790.61,249744.55,California,144259.4 | ||
93863.75,127320.38,249839.44,Florida,141585.52 | ||
91992.39,135495.07,252664.93,California,134307.35 | ||
119943.24,156547.42,256512.92,Florida,132602.65 | ||
114523.61,122616.84,261776.23,New York,129917.04 | ||
78013.11,121597.55,264346.06,California,126992.93 | ||
94657.16,145077.58,282574.31,New York,125370.37 | ||
91749.16,114175.79,294919.57,Florida,124266.9 | ||
86419.7,153514.11,0,New York,122776.86 | ||
76253.86,113867.3,298664.47,California,118474.03 | ||
78389.47,153773.43,299737.29,New York,111313.02 | ||
73994.56,122782.75,303319.26,Florida,110352.25 | ||
67532.53,105751.03,304768.73,Florida,108733.99 | ||
77044.01,99281.34,140574.81,New York,108552.04 | ||
64664.71,139553.16,137962.62,California,107404.34 | ||
75328.87,144135.98,134050.07,Florida,105733.54 | ||
72107.6,127864.55,353183.81,New York,105008.31 | ||
66051.52,182645.56,118148.2,Florida,103282.38 | ||
65605.48,153032.06,107138.38,New York,101004.64 | ||
61994.48,115641.28,91131.24,Florida,99937.59 | ||
61136.38,152701.92,88218.23,New York,97483.56 | ||
63408.86,129219.61,46085.25,California,97427.84 | ||
55493.95,103057.49,214634.81,Florida,96778.92 | ||
46426.07,157693.92,210797.67,California,96712.8 | ||
46014.02,85047.44,205517.64,New York,96479.51 | ||
28663.76,127056.21,201126.82,Florida,90708.19 | ||
44069.95,51283.14,197029.42,California,89949.14 | ||
20229.59,65947.93,185265.1,New York,81229.06 | ||
38558.51,82982.09,174999.3,California,81005.76 | ||
28754.33,118546.05,172795.67,California,78239.91 | ||
27892.92,84710.77,164470.71,Florida,77798.83 | ||
23640.93,96189.63,148001.11,California,71498.49 | ||
15505.73,127382.3,35534.17,New York,69758.98 | ||
22177.74,154806.14,28334.72,California,65200.33 | ||
1000.23,124153.04,1903.93,New York,64926.08 | ||
1315.46,115816.21,297114.46,Florida,49490.75 | ||
0,135426.92,0,California,42559.73 | ||
542.05,51743.15,0,New York,35673.41 | ||
0,116983.8,45173.06,California,14681.4 |
Binary file added
BIN
+5.37 KB
Part 2 - Regression/Section 5 - Multiple Linear Regression/Multiple_Linear_Regression.zip
Binary file not shown.
Binary file added
BIN
+6 KB
...ultiple Linear Regression/Multiple_Linear_Regression/Multiple_Linear_Regression/.DS_Store
Binary file not shown.
16 changes: 16 additions & 0 deletions
16
...ssion/Multiple_Linear_Regression/Multiple_Linear_Regression/data_preprocessing_template.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Data Preprocessing Template | ||
|
||
# Importing the dataset | ||
dataset = read.csv('Data.csv') | ||
|
||
# Splitting the dataset into the Training set and Test set | ||
# install.packages('caTools') | ||
library(caTools) | ||
set.seed(123) | ||
split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) | ||
training_set = subset(dataset, split == TRUE) | ||
test_set = subset(dataset, split == FALSE) | ||
|
||
# Feature Scaling | ||
# training_set = scale(training_set) | ||
# test_set = scale(test_set) |
23 changes: 23 additions & 0 deletions
23
...sion/Multiple_Linear_Regression/Multiple_Linear_Regression/data_preprocessing_template.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Data Preprocessing Template | ||
|
||
# Importing the libraries | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
|
||
# Importing the dataset | ||
dataset = pd.read_csv('Data.csv') | ||
X = dataset.iloc[:, :-1].values | ||
y = dataset.iloc[:, 3].values | ||
|
||
# Splitting the dataset into the Training set and Test set | ||
from sklearn.cross_validation import train_test_split | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) | ||
|
||
# Feature Scaling | ||
"""from sklearn.preprocessing import StandardScaler | ||
sc_X = StandardScaler() | ||
X_train = sc_X.fit_transform(X_train) | ||
X_test = sc_X.transform(X_test) | ||
sc_y = StandardScaler() | ||
y_train = sc_y.fit_transform(y_train)""" |
28 changes: 28 additions & 0 deletions
28
...ession/Multiple_Linear_Regression/Multiple_Linear_Regression/multiple_linear_regression.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Multiple Linear Regression | ||
|
||
# Importing the dataset | ||
dataset = read.csv('50_Startups.csv') | ||
|
||
# Encoding categorical data | ||
dataset$State = factor(dataset$State, | ||
levels = c('New York', 'California', 'Florida'), | ||
labels = c(1, 2, 3)) | ||
|
||
# Splitting the dataset into the Training set and Test set | ||
# install.packages('caTools') | ||
library(caTools) | ||
set.seed(123) | ||
split = sample.split(dataset$Profit, SplitRatio = 0.8) | ||
training_set = subset(dataset, split == TRUE) | ||
test_set = subset(dataset, split == FALSE) | ||
|
||
# Feature Scaling | ||
# training_set = scale(training_set) | ||
# test_set = scale(test_set) | ||
|
||
# Fitting Multiple Linear Regression to the Training set | ||
regressor = lm(formula = Profit ~ ., | ||
data = training_set) | ||
|
||
# Predicting the Test set results | ||
y_pred = predict(regressor, newdata = test_set) |
41 changes: 41 additions & 0 deletions
41
...ssion/Multiple_Linear_Regression/Multiple_Linear_Regression/multiple_linear_regression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Multiple Linear Regression | ||
|
||
# Importing the libraries | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
|
||
# Importing the dataset | ||
dataset = pd.read_csv('50_Startups.csv') | ||
X = dataset.iloc[:, :-1].values | ||
y = dataset.iloc[:, 4].values | ||
|
||
# Encoding categorical data | ||
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | ||
labelencoder = LabelEncoder() | ||
X[:, 3] = labelencoder.fit_transform(X[:, 3]) | ||
onehotencoder = OneHotEncoder(categorical_features = [3]) | ||
X = onehotencoder.fit_transform(X).toarray() | ||
|
||
# Avoiding the Dummy Variable Trap | ||
X = X[:, 1:] | ||
|
||
# Splitting the dataset into the Training set and Test set | ||
from sklearn.cross_validation import train_test_split | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) | ||
|
||
# Feature Scaling | ||
"""from sklearn.preprocessing import StandardScaler | ||
sc_X = StandardScaler() | ||
X_train = sc_X.fit_transform(X_train) | ||
X_test = sc_X.transform(X_test) | ||
sc_y = StandardScaler() | ||
y_train = sc_y.fit_transform(y_train)""" | ||
|
||
# Fitting Multiple Linear Regression to the Training set | ||
from sklearn.linear_model import LinearRegression | ||
regressor = LinearRegression() | ||
regressor.fit(X_train, y_train) | ||
|
||
# Predicting the Test set results | ||
y_pred = regressor.predict(X_test) |
Binary file added
BIN
+120 Bytes
...ear Regression/Multiple_Linear_Regression/__MACOSX/Multiple_Linear_Regression/._.DS_Store
Binary file not shown.
Binary file added
BIN
+576 Bytes
...gression/Multiple_Linear_Regression/__MACOSX/Multiple_Linear_Regression/._50_Startups.csv
Binary file not shown.
Oops, something went wrong.