from google.colab import drive drive.mount('/content/drive', force_remount=True) import pandas as pd import numpy as np from sklearn.model_selection import train_test_split, cross_val_score, KFold from sklearn.linear_model import Lasso, Ridge, ElasticNet from sklearn.impute import SimpleImputer from sklearn.metrics import r2_score from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import PolynomialFeatures from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, PolynomialFeatures from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report, confusion_matrix from sklearn.cluster import KMeans from sklearn.utils import resample import pandas as pd from sklearn.cluster import KMeans from sklearn.utils import resample from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import classification_report, confusion_matrix
path = '/content/drive/My Drive/Machine Learning Project/'
data = pd.read_csv(path + 'data.csv')
data = data.dropna()
X = data.drop(columns=['Bankrupt?']) # Replace 'Bankruptcy' with actual target column y = data['Bankrupt?'] # Replace with actual target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test)
baseline_model = LogisticRegression(max_iter=1000, random_state=42) baseline_model.fit(X_train_scaled, y_train)
y_pred_baseline = baseline_model.predict(X_test_scaled) print("Confusion Matrix (Baseline Logistic Regression):\n", confusion_matrix(y_test, y_pred_baseline)) print("\nClassification Report (Baseline Logistic Regression):\n", classification_report(y_test, y_pred_baseline))
data = data.dropna() # Drop rows with missing values
X = data.drop(columns=['Bankrupt?']) # Replace 'Bankruptcy' with the actual target column y = data['Bankrupt?'] # Replace with the actual target column
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False) X_interaction = poly.fit_transform(X)
scaler = StandardScaler() X_scaled = scaler.fit_transform(X_interaction)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=1000, penalty='l1', solver='saga', random_state=42) model.fit(X_train, y_train)
y_pred = model.predict(X_test) print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) print("\nClassification Report:\n", classification_report(y_test, y_pred))
feature_names = poly.get_feature_names_out(input_features=X.columns) coefficients = pd.DataFrame({'Feature': feature_names, 'Coefficient': model.coef_[0]}) coefficients = coefficients.sort_values(by='Coefficient', ascending=False) print("\nTop Features with Coefficients:\n", coefficients.head(10))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
majority_train = X_train[y_train == 0] minority_train = X_train[y_train == 1]
kmeans = KMeans(n_clusters=5, random_state=42).fit(minority_train) minority_train['Cluster'] = kmeans.labels_
resampled_minority = pd.concat([ resample( minority_train[minority_train['Cluster'] == i].drop(columns=['Cluster']), replace=True, n_samples=len(majority_train) // 5, random_state=42 ) for i in range(5) ])
balanced_train = pd.concat([majority_train, resampled_minority]) X_balanced = balanced_train y_balanced = pd.concat([ pd.Series(0, index=majority_train.index), pd.Series(1, index=resampled_minority.index) ])
model = RandomForestClassifier(random_state=42) model.fit(X_balanced, y_balanced)
y_pred = model.predict(X_test) print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) print("\nClassification Report:\n", classification_report(y_test, y_pred))
import tensorflow as tf
nn_model = tf.keras.Sequential([ tf.keras.layers.Input(shape=(X_train_scaled.shape[1],)), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(16, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') # Binary classification ])
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train_scaled, y_train, validation_split=0.2, epochs=20, batch_size=32, verbose=1)
loss, accuracy = nn_model.evaluate(X_test_scaled, y_test) print(f"Neural Network Test Accuracy: {accuracy:.2f}")
y_pred_nn = (nn_model.predict(X_test_scaled) > 0.5).astype(int) print("Confusion Matrix (Neural Network):\n", confusion_matrix(y_test, y_pred_nn)) print("\nClassification Report (Neural Network):\n", classification_report(y_test, y_pred_nn))