diff --git a/recognition/README.md b/recognition/README.md index 5c646231c2..019afc487d 100644 --- a/recognition/README.md +++ b/recognition/README.md @@ -1,10 +1,39 @@ -# Recognition Tasks -Various recognition tasks solved in deep learning frameworks. - -Tasks may include: -* Image Segmentation -* Object detection -* Graph node classification -* Image super resolution -* Disease classification -* Generative modelling with StyleGAN and Stable Diffusion +## Lesion Segmentation of ISIC 2017 with Improved UNet + +### Table of Contents + - [ISIC 2017 Lesion Datasets](#ISIC-2017-Lesion-Datasets) + - [The Improved UNET Architecture](#The-Improved-UNET-Architecture) + - [Training and Testing](#Training-and-Testing) + - [Results and Discussion](#Results-andDiscussion) + + +### ISIC 2017 Lesion Datasets +The [ISIC 2017](https://challenge.isic-archive.com/data/#2017) dataset was used to build the model and perfrom segmentation. The dataset included training, testing and validation sets of 2000 images each, however only a certain number of each were selected due to the limitations on training time, as shown below. + +- Training set: 200 +- Validation set: 150 +- Test set: 9 + +The dataset included JPG images of skin lesions along with PNG segmentations to determine and back-propogate losses. + +Each image was processed by first resizing it to 256 x 256 pixels squared and applying transformations such as random rotation and flipping as it make the model invariant to certain trivial transformations. + +### The Improved UNET Architecture +The Improved UNET architecture utilised was the one proposed in [Brain Tumor Segmentation and Radiomics Survival Prediction: Contribution to the BRATS 2017 Challenge](https://arxiv.org/pdf/1802.10508v1.pdf). Originally used for 3D image segmentation on the BRATS 2017 dataset, the Imroved UNET architecture was adopted to better suit 2D segmentation of skin lesions with the ISIC dataset. + +The proposed UNet architecture is a neural network model used for image segmentation tasks. It consists of an encoder part and a decoder part. The encoder part is composed of several convolutional layers with increasing number of filters, followed by a ContextBlock that applies residual connections and dropout regularization. The decoder part is composed of several upsampling layers that increase the spatial resolution of the feature maps, followed by a LocalisationBlock that combines the feature maps from the encoder and decoder parts. The output of the model is a tensor with the same size as the input image, where each pixel is classified into a specific category. The UNet architecture is known for its ability to handle small datasets and produce accurate segmentation results. + +### Training and Testing + 24 epochs of training and validation was conducted on the aforementioned sample of images, with demonstration of the required Dice Similarity Coeeficient of greater than 0.8 by the end of the training. Refer to Fig. 1 and 2 below. + +### Results + +

+ + Fig. 1: Training and Validation loss demeonstrating that model can for all test images achive a Dice Similarity Coeeficient of >0.8 +

+ +

+ + Fig 2: Demonstration of successful prediction mask from given Lesion Image +

\ No newline at end of file diff --git a/recognition/s4630051_improved_unet_segmentation/dataset.py b/recognition/s4630051_improved_unet_segmentation/dataset.py new file mode 100644 index 0000000000..e92edd8ec7 --- /dev/null +++ b/recognition/s4630051_improved_unet_segmentation/dataset.py @@ -0,0 +1,98 @@ +from PIL import Image +from torch.utils.data import Dataset +from torchvision import transforms +import os + +TEST = True + +DATA_PATH = os.path.join('s4630051_improved_unet_segmentation', 'test_data' if TEST else 'data') + +TRAIN_PATH = os.path.join(DATA_PATH, 'train', 'ISIC-2017_Training_Data') +TRAIN_SEG_PATH = os.path.join(DATA_PATH, 'train', 'ISIC-2017_Training_Part1_GroundTruth') + +VALIDATE_PATH = os.path.join(DATA_PATH, 'validate', 'ISIC-2017_Validation_Data') +VALIDATE_SEG_PATH = os.path.join(DATA_PATH, 'validate', 'ISIC-2017_Validation_Part1_GroundTruth') + +TEST_PATH = os.path.join(DATA_PATH, 'test', 'ISIC-2017_Test_v2_Data') +TEST_SEG_PATH = os.path.join(DATA_PATH, 'test', 'ISIC-2017_Test_v2_Part1_GroundTruth') + +IMAGE_SIZE = 256 + +class ISICDataset(Dataset): + """ + Dataset class for ISIC 2017 dataset. + """ + def __init__(self, image_dir, mask_dir, transform=None): + """ + Args: + image_dir (string): Path to the image directory. + mask_dir (string): Path to the mask directory. + transform (callable, optional): Optional transform to be applied on a sample. + """ + # Set image and mask directories + self.image_dir = image_dir + self.mask_dir = mask_dir + # Set transform + self.transform = transform + # Get image and mask names + self.image_names = os.listdir(image_dir) + self.mask_names = os.listdir(mask_dir) + + self.length = len(self.image_names) + + def __len__(self): + """ + Returns the length of the dataset. + """ + return self.length + + def __getitem__(self, idx): + """ + Returns a sample of the dataset. + """ + image_path = os.path.join(self.image_dir, self.image_names[idx]) + mask_path = os.path.join(self.mask_dir, self.mask_names[idx]) + + image = Image.open(image_path).convert('RGB') + mask = Image.open(mask_path).convert('L') + + if self.transform: + image = self.transform(image) + mask = transforms.Resize((IMAGE_SIZE, IMAGE_SIZE))(mask) + mask = transforms.ToTensor()(mask) + + return image, mask + +# Transformations +data_transforms = { + 'train': transforms.Compose([ + transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(15), + transforms.ToTensor(), + transforms.Normalize(mean=[0.7084, 0.5822, 0.5361], + std=[0.0948, 0.1099, 0.1240]) + ]), + 'validate': transforms.Compose([ + transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.7084, 0.5822, 0.5361], + std=[0.0948, 0.1099, 0.1240]) + ]), + 'test': transforms.Compose([ + transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.7084, 0.5822, 0.5361], + std=[0.0948, 0.1099, 0.1240]) + ]) +} + +def get_datasets(): + """ + Returns the datasets + """ + train_set = ISICDataset(TRAIN_PATH, TRAIN_SEG_PATH, transform=data_transforms['train']) + validate_set = ISICDataset(VALIDATE_PATH, VALIDATE_SEG_PATH, transform=data_transforms['validate']) + test_set = ISICDataset(TEST_PATH, TEST_SEG_PATH, transform=data_transforms['test']) + + return train_set, validate_set, test_set diff --git a/recognition/s4630051_improved_unet_segmentation/images/ISIC_0012092_segmentation.png b/recognition/s4630051_improved_unet_segmentation/images/ISIC_0012092_segmentation.png new file mode 100644 index 0000000000..f07c799e38 Binary files /dev/null and b/recognition/s4630051_improved_unet_segmentation/images/ISIC_0012092_segmentation.png differ diff --git a/recognition/s4630051_improved_unet_segmentation/images/proof.png b/recognition/s4630051_improved_unet_segmentation/images/proof.png new file mode 100644 index 0000000000..f964971d37 Binary files /dev/null and b/recognition/s4630051_improved_unet_segmentation/images/proof.png differ diff --git a/recognition/s4630051_improved_unet_segmentation/images/proof.png:Zone.Identifier b/recognition/s4630051_improved_unet_segmentation/images/proof.png:Zone.Identifier new file mode 100644 index 0000000000..2b5f2d89c3 --- /dev/null +++ b/recognition/s4630051_improved_unet_segmentation/images/proof.png:Zone.Identifier @@ -0,0 +1,4 @@ +[ZoneTransfer] +ZoneId=3 +ReferrerUrl=https://mail.google.com/ +HostUrl=https://mail-attachment.googleusercontent.com/attachment/u/0/?ui=2&ik=ce28c11aab&attid=0.1&permmsgid=msg-f:1780735784165910335&th=18b671c2ce09273f&view=att&disp=safe&saddbat=ANGjdJ_f052uOyBoZsWqLHvc1Uhx6xOxlZ7E-b37cdLc4KgWnfvVCFAAt1tVJxVY_MFOaLu8mjkwj-Ht0BmVzn5ClKb-0xFv5g-Nu16E-f_-ORsJeOcb810qe0Mmsu6SyOsB6qbAmSRezPODHxeB3lDd15C7oOtzJYBKiQzos2dLm1kDBKAJxqjFfa3AMwT1yW-SUFFh-0ssnRIeoMB-Dax5ZPp0W4zpS7B3ZfDpqGE0ZJ5KYOV56rdgGh90TsPV_tnewTqx4pvsVh9rWyNzoD8Sbiv7nLimkbd0_QasmQ2okbW-cfqPQNbHcmn_B8H6G5UlO0SIeiyd8cYW2LI2P-AJ9Miqhux0OSKcUkdaLVy6RYZ2sTJFOPBA1IQFPX8TLbZCS_xHJWsxdojny_9kjrRenTgu17_W7KNAw1fT_PY3BB7yaOVz1JjKy5dV94EDVZSxTblpOyh87fvSd71tUHWqh1Gt8titUM5xCcshkcyIEBREwpatjFqVvBeggzBTIAJNd0RQVj7Ok6h7R5umiVucBcbuocPE_R2A95n6bimQTz6M906Y2xdULckNqfPBd4_b-97PJVTCfuhVCAeLxDzwqEBsu4lepXi46XIWEmHnrB0f297ixD7EJIKBH1kg_yICRGFVb1dbYt_caGnbWH-KDIMYNHApCEfq_oQDnQfXJOhHtuR1p8YL5TXpNrT9QtkPr0h6i-mhhuWSAT1AFwrwMj7np87IoCiPZnF1TkF-YOUYsX0JTlek-qigxx8Jm3R8RSCL_F68OLpnGzcN1JWuvAlpUeRixPrBqSEp8GY_ZNVSaay8MEJz8eT-Nf6kccakOTWjQ9Rf6jeCWD-ODywMGFJxiWdwXzuY3B3WM5N_H0DKgoCCIGEhNEZNCSsv5t6g1PbuRzsj-xa8FoS8G3_xm2L5NMx1_HCstUbx9kbqyCjcIuias0JGLq1dGZkEIizZzThkW82sEnc-WUITcX7d6mxtUfEC-QGA5qKJG5jxTbh8Lk5jzllraDsA5wMt0NcIWaF6AkNQM4VNFd_c_aUS2fvpgG6RqMSEcGlKtw diff --git a/recognition/s4630051_improved_unet_segmentation/images/test_mask_2.png b/recognition/s4630051_improved_unet_segmentation/images/test_mask_2.png new file mode 100644 index 0000000000..74881b31b6 Binary files /dev/null and b/recognition/s4630051_improved_unet_segmentation/images/test_mask_2.png differ diff --git a/recognition/s4630051_improved_unet_segmentation/images/test_model.png b/recognition/s4630051_improved_unet_segmentation/images/test_model.png new file mode 100644 index 0000000000..f6ddce8724 Binary files /dev/null and b/recognition/s4630051_improved_unet_segmentation/images/test_model.png differ diff --git a/recognition/s4630051_improved_unet_segmentation/images/train_val_loss.png b/recognition/s4630051_improved_unet_segmentation/images/train_val_loss.png new file mode 100644 index 0000000000..41fa26112f Binary files /dev/null and b/recognition/s4630051_improved_unet_segmentation/images/train_val_loss.png differ diff --git a/recognition/s4630051_improved_unet_segmentation/modules.py b/recognition/s4630051_improved_unet_segmentation/modules.py new file mode 100644 index 0000000000..dad7fa500a --- /dev/null +++ b/recognition/s4630051_improved_unet_segmentation/modules.py @@ -0,0 +1,174 @@ +import torch +import torch.nn as nn + +class ContextBlock(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3, pdrop=0.3): + super(ContextBlock, self).__init__() + + # Pre-Activation Residual Block with two 3x3 convolutional layers + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=1) + self.norm1 = nn.InstanceNorm2d(out_channels) + self.relu = nn.LeakyReLU(negative_slope=0.01, inplace=True) + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, padding=1) + self.norm2 = nn.InstanceNorm2d(out_channels) + + # Dropout layer + self.dropout = nn.Dropout2d(p=pdrop) + + def forward(self, x): + # Pre-Activation Residual Block + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + + x = self.dropout(x) + + x = self.conv2(x) + x = self.norm2(x) + x = self.relu(x) + + return x + +class UpsamplingBlock(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3): + super(UpsamplingBlock, self).__init__() + + # Upsampling layer + self.upsample = nn.Upsample(scale_factor=2, mode='nearest') + + # Convolutional layer + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=1) + + def forward(self, x): + x = self.upsample(x) + x = self.conv(x) + + return x + +class LocalisationBlock(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3): + super(LocalisationBlock, self).__init__() + + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=1) + self.relu = nn.LeakyReLU(negative_slope=0.01, inplace=True) + self.bn = nn.BatchNorm2d(out_channels) + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=1) + + def forward(self, x): + x = self.relu(x) + x = self.conv1(x) + x = self.bn(x) + x = self.relu(x) + x = self.conv2(x) + x = self.bn(x) + + return x + +class UNetImproved(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3): + super(UNetImproved, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + self.relu = nn.LeakyReLU(negative_slope=0.01, inplace=True) + + # Encoder + self.conv1 = nn.Conv2d(in_channels, 16, kernel_size=kernel_size, padding=1) + self.context1 = ContextBlock(16, 16) + + self.conv2 = nn.Conv2d(16, 32, kernel_size=kernel_size, padding=1, stride=2) + self.context2 = ContextBlock(32, 32) + + self.conv3 = nn.Conv2d(32, 64, kernel_size=kernel_size, padding=1, stride=2) + self.context3 = ContextBlock(64, 64) + + self.conv4 = nn.Conv2d(64, 128, kernel_size=kernel_size, padding=1, stride=2) + self.context4 = ContextBlock(128, 128) + + self.conv5 = nn.Conv2d(128, 256, kernel_size=kernel_size, padding=1, stride=2) + self.context5 = ContextBlock(256, 256) + + self.up1 = UpsamplingBlock(256, 128) + self.local1 = LocalisationBlock(256, 128) + + self.up2 = UpsamplingBlock(128, 64) + self.local2 = LocalisationBlock(128, 64) + self.seg1 = nn.Conv2d(64, out_channels, stride=1, kernel_size=kernel_size, padding=1) + + self.up3 = UpsamplingBlock(64, 32) + self.local3 = LocalisationBlock(64, 32) + self.seg2 = nn.Conv2d(32, out_channels, stride=1, kernel_size=kernel_size, padding=1) + + self.up4 = UpsamplingBlock(32, 16) + self.local4 = LocalisationBlock(32, 16) + + self.conv6 = nn.Conv2d(16, 32, kernel_size=kernel_size, padding=1) + + self.seg3 = nn.Conv2d(32, out_channels, stride=1, kernel_size=kernel_size, padding=1) + + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + # Encoder + x = self.conv1(x) + x = self.relu(x) + y = self.context1(x) + skip_1 = x + y + + x = self.conv2(skip_1) + x = self.relu(x) + y = self.context2(x) + skip_2 = x + y + + x = self.conv3(skip_2) + x = self.relu(x) + y = self.context3(x) + skip_3 = x + y + + x = self.conv4(skip_3) + x = self.relu(x) + y = self.context4(x) + skip_4 = x + y + + x = self.conv5(skip_4) + x = self.relu(x) + y = self.context5(x) + x = x + y + + # Decoder + x = self.up1(x) + x = torch.cat([x, skip_4], dim=1) + x = self.local1(x) + + x = self.up2(x) + x = torch.cat([x, skip_3], dim=1) + y = self.local2(x) + + skip_seg_1 = self.seg1(y) + skip_seg_1 = nn.Upsample(scale_factor=2, mode='nearest')(skip_seg_1) + + x = self.up3(y) + x = torch.cat([x, skip_2], dim=1) + y = self.local3(x) + + skip_seg_2 = self.seg2(y) + skip_seg_2 = skip_seg_2 + skip_seg_1 + skip_seg_2 = nn.Upsample(scale_factor=2, mode='nearest')(skip_seg_2) + + x = self.up4(y) + x = torch.cat([x, skip_1], dim=1) + x = self.local4(x) + + x = self.conv6(x) + x = self.relu(x) + x = self.seg3(x) + + x = x + skip_seg_2 + + x = self.sigmoid(x) + + return x + + + + \ No newline at end of file diff --git a/recognition/s4630051_improved_unet_segmentation/predict.py b/recognition/s4630051_improved_unet_segmentation/predict.py new file mode 100644 index 0000000000..cd42fb8870 --- /dev/null +++ b/recognition/s4630051_improved_unet_segmentation/predict.py @@ -0,0 +1,43 @@ +import os +from matplotlib import pyplot as plt +import torch +from dataset import * +import modules +from torch.utils.data import DataLoader +from torch import nn +from torchvision import utils +import matplotlib.pyplot as plt + +TEST_PATH = os.path.join(DATA_PATH, 'test', 'ISIC-2017_Test_v2_Data') + +device = 'cuda' if torch.cuda.is_available() else 'cpu' + +model = modules.ImprovedUNET(3,1) +model.load_state_dict(torch.load('s4630051_improved_unet_segmentation/save/model_save_final.pth')) + +test_dataset = ISICDataset(TEST_PATH, TEST_SEG_PATH, transform=data_transforms['test']) +test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False) + +model.eval() + +model.to(device) + +for image in test_loader: + + image = image.to(device) + output = model(image) + + out_grid_img = utils.make_grid(output.cpu(), nrow=4) + image_grid = utils.make_grid(image.cpu(), nrow=4) + + fig = plt.figure() + ax1 = fig.add_subplot(1,2,1) + + ax1.imshow(out_grid_img.permute(1,2,0), cmap='gray') + ax2 = fig.add_subplot(1,2,2) + + ax2.imshow(image_grid.permute(1,2,0)) + plt.savefig('modelpredictions.png') + + plt.show() + break \ No newline at end of file diff --git a/recognition/s4630051_improved_unet_segmentation/train.py b/recognition/s4630051_improved_unet_segmentation/train.py new file mode 100644 index 0000000000..4f80c69d18 --- /dev/null +++ b/recognition/s4630051_improved_unet_segmentation/train.py @@ -0,0 +1,152 @@ +import time +from matplotlib import pyplot as plt +import torch +import dataset +import modules +from torch.utils.data import DataLoader +from torch import nn + +# Hyperparameters +BATCH_SIZE = 10 +LEARNING_RATE = 10e-4 +EPOCHS = 20 +DECAY = 10e-6 + +# Set device +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Set datasets +train_set, validate_set, test_set = dataset.get_datasets() + +# Set dataloaders +train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) +validate_loader = DataLoader(validate_set, batch_size=BATCH_SIZE, shuffle=False) + +class DiceLoss(nn.Module): + def __init__(self): + super(DiceLoss, self).__init__() + + def forward(self, output, mask): + """ + Calculates the Dice Loss. + @param output: The output of the model. + @param mask: The ground truth mask. + @return: The Dice Loss. + """ + smooth = 1e-4 + + output_flat = output.view(-1) + mask_flat = mask.view(-1) + + intersection = torch.sum(output_flat * mask_flat) + union = torch.sum(output_flat) + torch.sum(mask_flat) + + dice = (2.0 * intersection + smooth) / (union + smooth) + + return 1 - dice + + +def train_model(model, criterion, optimizer, scheduler, num_epochs=EPOCHS): + """ + Trains the model. + @param model: The model to train. + @param criterion: The loss function. + @param optimizer: The optimizer. + @param scheduler: The learning rate scheduler. + @param num_epochs: The number of epochs to train the model for. + @return: The trained model and the training and validation losses. + """ + print('Begin training model...') + + # Set model to training mode + start = time.time() + + train_lossess = [] + validate_losses = [] + + # Iterate over epochs + for epoch in range(num_epochs): + train_loss_sum = 0.0 + + for image, mask in train_loader: + model.train() + + image = image.to(device) + mask = mask.to(device).float() + + # Zero the parameter gradients + optimizer.zero_grad() + + output = model(image) + + loss = criterion(output, mask) + + loss.backward() + optimizer.step() + + train_loss_sum += loss.item() + + avg_train_loss = train_loss_sum / len(train_loader) + train_lossess.append(avg_train_loss) + + print('Epoch: {}/{} | Training Loss: {:.4f}'.format(epoch + 1, num_epochs, avg_train_loss)) + + # Validate model + validate_loss_sum = 0.0 + accuracy_sum = 0.0 + + for image, mask in validate_loader: + model.eval() + + image = image.to(device) + mask = mask.to(device) + + output = model(image) + + with torch.no_grad(): + loss = criterion(output, mask) + + accuracy_sum += accuracy(output, mask) + + validate_loss_sum += loss.item() + + avg_validate_loss = validate_loss_sum / len(validate_loader) + validate_losses.append(avg_validate_loss) + + print('Epoch: {}/{} | Validation Loss: {:.4f}'.format(epoch + 1, num_epochs, avg_validate_loss)) + print('Epoch: {}/{} | Validation Accuracy: {:.4f}'.format(epoch + 1, num_epochs, accuracy_sum / len(validate_loader)), end='\n\n') + + scheduler.step(avg_validate_loss) + torch.save(model.state_dict(), f's4630051_improved_unet_segmentation/save/model_save_{epoch}.pth') + + end = time.time() + print('Finished training model. Time taken: {:.4f} seconds'.format(end - start)) + + torch.save(model.state_dict(), 's4630051_improved_unet_segmentation/save/model_save_final.pth') + + return model, train_lossess, validate_losses + +def accuracy(model, mask): + with torch.no_grad(): + model = (model > 0.5).float() #if a pixel has value > 0.5, we accept it as a skin lesion + correct = (model == mask).sum() + pixels = torch.numel(model) + accuracy = correct / pixels + 1e-8 + + return accuracy + +def test_model(model): + """ + Test the model. + @param model: The model to test. + """ + pass + +model = modules.UNetImproved(3, 1) + +model.to(device) + +criterion = DiceLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=DECAY) +scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: LEARNING_RATE * (0.975 ** epoch)) +train_model(model, criterion, optimizer, scheduler) \ No newline at end of file