diff --git a/Part 1 - Data Preprocessing/data_preprocessing.py b/Part 1 - Data Preprocessing/data_preprocessing.py index 96e8bdd..251862d 100644 --- a/Part 1 - Data Preprocessing/data_preprocessing.py +++ b/Part 1 - Data Preprocessing/data_preprocessing.py @@ -1,5 +1,3 @@ -# Data Preprocessing Template - # Importing the libraries import numpy as np import matplotlib.pyplot as plt diff --git a/Part 2 - Regression/Section 5 - Multiple Linear Regression/Multiple_linear_regression.py b/Part 2 - Regression/Section 5 - Multiple Linear Regression/Multiple_linear_regression.py index b467d1a..9a18c08 100644 --- a/Part 2 - Regression/Section 5 - Multiple Linear Regression/Multiple_linear_regression.py +++ b/Part 2 - Regression/Section 5 - Multiple Linear Regression/Multiple_linear_regression.py @@ -7,7 +7,6 @@ X = dataset.iloc[:, :-1].values y = dataset.iloc[:, 4].values - from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder_X = LabelEncoder() X[:, 3] = labelencoder_X.fit_transform(X[:, 3]) @@ -32,7 +31,10 @@ X = np.append(arr=np.ones((50,1)).astype(int), values = X, axis=1) X_opt = X[:, [0,1,2,3,4,5]] regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() -regressor_OLS.summary() +p_values = regressor_OLS.pvalues +#p_values = p_values.numpy() +max_p = np.amax(p_values) + X_opt = X[:, [0,1,3,4,5]] regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() diff --git a/Part 2 - Regression/Section 6 - Polynomial Regression/Position_Salaries.csv b/Part 2 - Regression/Section 6 - Polynomial Regression/Position_Salaries.csv new file mode 100644 index 0000000..76d9d3e --- /dev/null +++ b/Part 2 - Regression/Section 6 - Polynomial Regression/Position_Salaries.csv @@ -0,0 +1,11 @@ +Position,Level,Salary +Business Analyst,1,45000 +Junior Consultant,2,50000 +Senior Consultant,3,60000 +Manager,4,80000 +Country Manager,5,110000 +Region Manager,6,150000 +Partner,7,200000 +Senior Partner,8,300000 +C-level,9,500000 +CEO,10,1000000 \ No newline at end of file diff --git a/Part 2 - Regression/Section 6 - Polynomial Regression/polynomial_linear_regression.py b/Part 2 - Regression/Section 6 - Polynomial Regression/polynomial_linear_regression.py new file mode 100644 index 0000000..744adf7 --- /dev/null +++ b/Part 2 - Regression/Section 6 - Polynomial Regression/polynomial_linear_regression.py @@ -0,0 +1,44 @@ +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importing the dataset +dataset = pd.read_csv('Position_Salaries.csv') +X = dataset.iloc[:, 1:2].values +y = dataset.iloc[:, 2].values + +#Fitting Linear Regression to the dataset +from sklearn.linear_model import LinearRegression +lin_reg = LinearRegression() +lin_reg.fit(X,y) + +#Fitting Polynomial Regression to the dataset +from sklearn.preprocessing import PolynomialFeatures +poly_reg = PolynomialFeatures(degree=4) +X_poly = poly_reg.fit_transform(X) #transform to matrix +lin_reg_2 = LinearRegression() +lin_reg_2.fit(X_poly,y) + +#Visusalizong Linear Regression +plt.scatter(X,y, color= 'red') +plt.plot(X, lin_reg.predict(X), color='blue') +plt.title('Truth or Bluff(Linear Refression)') +plt.xlabel('Position Level') +plt.ylabel('Salary') +plt.show() + +#Visusalizong Polynomial Regression +X_grid = np.arange(min(X), max(X), 0.1) +X_grid = X_grid.reshape(len(X_grid),1) +plt.scatter(X,y, color= 'red') +plt.plot(X_grid, lin_reg_2.predict(poly_reg.fit_transform(X_grid)), color='blue') +plt.title('Truth or Bluff(Linear Refression)') +plt.xlabel('Position Level') +plt.ylabel('Salary') +plt.show() + +#Predicting new result with Linear Regression +lin_reg.predict(6.5) + +#Predicting new result with Linear Regression +lin_reg_2.predict(poly_reg.fit_transform(6.5)) \ No newline at end of file