Skip to content

Task 3: Building model for age estimation

elhava edited this page May 29, 2023 · 3 revisions

Given functional connectivity estimates derived above, machine learning methods can be used to predict individual's age.

  • Use different machine linear models to estimate age. Calculate the prediction error.
  • Use cross validation to evaluate if the model can be generalized for testing individuals, whose data are not included in model building.
import os
import gc
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt
from nilearn.image import resample_to_img
from nilearn.maskers import NiftiLabelsMasker
from nilearn.datasets import load_mni152_template
from nilearn.datasets import fetch_atlas_destrieux_2009
from nilearn.connectome import ConnectivityMeasure


#load data

path = '/content/drive/MyDrive/fMRI/data'

def load_data(path):
  imgs = []
  names = []
  for root, dirs, files in os.walk(path):
    for filename in files:
      if 'nii' in filename:
        input_path = os.path.join(root, filename)
        img = nib.load(input_path)
        imgs.append(img)
        names.append(filename[0:8])

  return imgs, names


atlas_filename = fetch_atlas_destrieux_2009()['maps']
atlas_img = nib.load(atlas_filename)

 #load MNI152 template
template = load_mni152_template(resolution=2)

# Create a masker to extract the ROI time series data and apply some preprocessing
masker = NiftiLabelsMasker(atlas_img, standardize=True, detrend=True, low_pass=0.1, high_pass=0.01, t_r=2)

#functional connectivity


def calculate_FC(data_list, name_list):

  FC = {}
  for img, name in zip(data_list, name_list):
    
    print(name)
    #resample into MNI152 space
    img = resample_to_img(img, template)

    # Extract the ROI time series data
    roi_time_series = masker.fit_transform(img, confounds=None)

    # Compute functional connectivity
    correlation_measure = ConnectivityMeasure(kind='correlation')
    correlation_matrix = correlation_measure.fit_transform([roi_time_series])[0]

    # Threshold the correlation matrix
    threshold_matrix = np.where(np.abs(correlation_matrix) > 0.5, correlation_matrix, 0)

    #FC.append(threshold_matrix)
    FC[name] = threshold_matrix
    print(FC.keys())
    del img
    del correlation_measure
    del correlation_matrix
    del roi_time_series
    del threshold_matrix
    gc.collect()

  return FC


imgs, names = load_data(path)

fc = calculate_FC(imgs, names)

#load excel file containing patient-age data
age = pd.read_excel('/content/drive/MyDrive/fMRI/data/sub_info_100_199.xlsx')

my_ls = sorted(list(fc.keys()))
sorted_dict = {i: fc[i] for i in my_ls}

#remove rows without FMRI data
for j in age['ID']:
  if j not in sorted_dict.keys():
    idx = age[age['ID']==j].index[0]
    age.drop(idx, inplace=True)
age['Sex'] = age['Sex'].astype(int)
#age['Age'] = age['Age'].astype(int)
len(age)

Some simple Machine Learning code for fMRI age estimation

import numpy as np
# Load the preprocessed functional connectivity matrices and labels
X = np.array([sorted_dict[sample] for sample in sorted_dict])
X = X.reshape(X.shape[0], X.shape[1]*X.shape[2])
y = np.array(age['Age'])
print(X.shape)
print(y.shape)

#Linear Regression

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

# Choose a machine learning algorithm and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Test the performance of the model on the test data
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the performance metrics
print('Mean Squared Error:', mse)
print('Mean Absolute Error:', mae)
print('R-squared:', r2)

#Linear Regression + Cross validation

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score, KFold
import numpy as np

# Load your data and split it into features (X) and target (y)

# Create a linear regression model
lr_model = LinearRegression()

# Define the cross-validation method
cv_method = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation and calculate the mean R2 score
cv_scores = cross_val_score(lr_model, X, y, cv=cv_method, scoring='r2')
mean_r2 = np.mean(cv_scores)

# Print the mean R2 score
print("Mean R2 score:", mean_r2)


#Support Vector Regression

from sklearn.model_selection import KFold
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
import numpy as np

# Define the SVM model
model = SVR(kernel='rbf', C=10, gamma='scale')

# Define the cross-validation iterator
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation and evaluate the model
mae_scores = []
for train_idx, test_idx in cv.split(X):
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae_scores.append(mean_absolute_error(y_test, y_pred))

print("Mean absolute error: {:.2f} years".format(np.mean(mae_scores)))

Clone this wiki locally