-
Notifications
You must be signed in to change notification settings - Fork 0
Task 3: Building model for age estimation
elhava edited this page May 29, 2023
·
3 revisions
Given functional connectivity estimates derived above, machine learning methods can be used to predict individual's age.
- Use different machine linear models to estimate age. Calculate the prediction error.
- Use cross validation to evaluate if the model can be generalized for testing individuals, whose data are not included in model building.
import os
import gc
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt
from nilearn.image import resample_to_img
from nilearn.maskers import NiftiLabelsMasker
from nilearn.datasets import load_mni152_template
from nilearn.datasets import fetch_atlas_destrieux_2009
from nilearn.connectome import ConnectivityMeasure
#load data
path = '/content/drive/MyDrive/fMRI/data'
def load_data(path):
imgs = []
names = []
for root, dirs, files in os.walk(path):
for filename in files:
if 'nii' in filename:
input_path = os.path.join(root, filename)
img = nib.load(input_path)
imgs.append(img)
names.append(filename[0:8])
return imgs, names
atlas_filename = fetch_atlas_destrieux_2009()['maps']
atlas_img = nib.load(atlas_filename)
#load MNI152 template
template = load_mni152_template(resolution=2)
# Create a masker to extract the ROI time series data and apply some preprocessing
masker = NiftiLabelsMasker(atlas_img, standardize=True, detrend=True, low_pass=0.1, high_pass=0.01, t_r=2)
#functional connectivity
def calculate_FC(data_list, name_list):
FC = {}
for img, name in zip(data_list, name_list):
print(name)
#resample into MNI152 space
img = resample_to_img(img, template)
# Extract the ROI time series data
roi_time_series = masker.fit_transform(img, confounds=None)
# Compute functional connectivity
correlation_measure = ConnectivityMeasure(kind='correlation')
correlation_matrix = correlation_measure.fit_transform([roi_time_series])[0]
# Threshold the correlation matrix
threshold_matrix = np.where(np.abs(correlation_matrix) > 0.5, correlation_matrix, 0)
#FC.append(threshold_matrix)
FC[name] = threshold_matrix
print(FC.keys())
del img
del correlation_measure
del correlation_matrix
del roi_time_series
del threshold_matrix
gc.collect()
return FC
imgs, names = load_data(path)
fc = calculate_FC(imgs, names)
#load excel file containing patient-age data
age = pd.read_excel('/content/drive/MyDrive/fMRI/data/sub_info_100_199.xlsx')
my_ls = sorted(list(fc.keys()))
sorted_dict = {i: fc[i] for i in my_ls}
#remove rows without FMRI data
for j in age['ID']:
if j not in sorted_dict.keys():
idx = age[age['ID']==j].index[0]
age.drop(idx, inplace=True)
age['Sex'] = age['Sex'].astype(int)
#age['Age'] = age['Age'].astype(int)
len(age)
import numpy as np
# Load the preprocessed functional connectivity matrices and labels
X = np.array([sorted_dict[sample] for sample in sorted_dict])
X = X.reshape(X.shape[0], X.shape[1]*X.shape[2])
y = np.array(age['Age'])
print(X.shape)
print(y.shape)
#Linear Regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)
# Choose a machine learning algorithm and train the model
model = LinearRegression()
model.fit(X_train, y_train)
# Test the performance of the model on the test data
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Print the performance metrics
print('Mean Squared Error:', mse)
print('Mean Absolute Error:', mae)
print('R-squared:', r2)
#Linear Regression + Cross validation
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score, KFold
import numpy as np
# Load your data and split it into features (X) and target (y)
# Create a linear regression model
lr_model = LinearRegression()
# Define the cross-validation method
cv_method = KFold(n_splits=5, shuffle=True, random_state=42)
# Perform cross-validation and calculate the mean R2 score
cv_scores = cross_val_score(lr_model, X, y, cv=cv_method, scoring='r2')
mean_r2 = np.mean(cv_scores)
# Print the mean R2 score
print("Mean R2 score:", mean_r2)
#Support Vector Regression
from sklearn.model_selection import KFold
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
import numpy as np
# Define the SVM model
model = SVR(kernel='rbf', C=10, gamma='scale')
# Define the cross-validation iterator
cv = KFold(n_splits=5, shuffle=True, random_state=42)
# Perform cross-validation and evaluate the model
mae_scores = []
for train_idx, test_idx in cv.split(X):
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae_scores.append(mean_absolute_error(y_test, y_pred))
print("Mean absolute error: {:.2f} years".format(np.mean(mae_scores)))