|
| 1 | +import shutil |
| 2 | +import numpy as np |
| 3 | +import matplotlib.pyplot as plt |
| 4 | +import os |
| 5 | +import nibabel as nib |
| 6 | +import pickle |
| 7 | +import random |
| 8 | +import pandas as pd |
| 9 | +from tqdm import tqdm |
| 10 | +# import cv2 |
| 11 | +import SimpleITK as sitk |
| 12 | +from scipy.ndimage.interpolation import zoom |
| 13 | +import json |
| 14 | +from PIL import Image |
| 15 | + |
| 16 | +base_dir = "/mnt/weka/wekafs/rad-megtron/cchen" |
| 17 | + |
| 18 | +# Define RGB values and their corresponding mask indices |
| 19 | +color_mappings = { |
| 20 | + (0, 255, 0): 1, |
| 21 | + (0, 255, 255): 2, |
| 22 | + (125, 255, 12): 3, |
| 23 | + (255, 55, 0): 4, |
| 24 | + (24, 55, 125): 5, |
| 25 | + (187, 155, 25): 6, |
| 26 | + (0, 255, 125): 7, |
| 27 | + (255, 255, 125): 8, |
| 28 | + (123, 15, 175): 9, |
| 29 | + (124, 155, 5): 10, |
| 30 | + (12, 255, 141): 11 |
| 31 | +} |
| 32 | + |
| 33 | +def organize_data(): |
| 34 | + save_pth = base_dir + '/' + 'Dataset907_endovis18' |
| 35 | + |
| 36 | + os.makedirs(save_pth+'/imagesTr', exist_ok=True) |
| 37 | + os.makedirs(save_pth+'/labelsTr', exist_ok=True) |
| 38 | + |
| 39 | + data_pth_all = [base_dir + '/' + 'endovis18/Train', |
| 40 | + base_dir + '/' + 'endovis18/Test', |
| 41 | + ] |
| 42 | + |
| 43 | + for data_pth in data_pth_all: |
| 44 | + data_fd_list = os.listdir(data_pth) |
| 45 | + data_fd_list = [data_fd for data_fd in data_fd_list if data_fd.startswith('seq')] |
| 46 | + data_fd_list.sort() |
| 47 | + |
| 48 | + for data_fd in data_fd_list: |
| 49 | + print(data_fd) |
| 50 | + if data_pth.split('/')[-1]=='Train': |
| 51 | + patient_ID = '00' + "{:02d}".format(int(data_fd.split('_')[-1])) |
| 52 | + elif data_pth.split('/')[-1]=='Test': |
| 53 | + patient_ID = '00' + "{:02d}".format(int(data_fd.split('_')[-1])+20) |
| 54 | + |
| 55 | + filename_list = os.listdir(data_pth+'/'+data_fd+'/labels') |
| 56 | + filename_list = [filename for filename in filename_list if filename.endswith('.png')] |
| 57 | + filename_list.sort() |
| 58 | + |
| 59 | + for filename in filename_list: |
| 60 | + |
| 61 | + file_ID = filename.split('.png')[0][5:] |
| 62 | + |
| 63 | + label_obj = Image.open(data_pth + '/' + data_fd + '/labels' + '/' + filename) |
| 64 | + label_arr = np.array(label_obj) |
| 65 | + |
| 66 | + mask = np.zeros_like(label_arr[:,:,0]) |
| 67 | + |
| 68 | + for color, index in color_mappings.items(): |
| 69 | + condition = (label_arr[:,:,0] == color[0]) & (label_arr[:,:,1] == color[1]) & (label_arr[:,:,2] == color[2]) |
| 70 | + mask[condition] = index |
| 71 | + |
| 72 | + mask_obj = Image.fromarray(mask.astype(np.uint8)) |
| 73 | + mask_obj.save(save_pth+'/labelsTr/endovis_'+patient_ID+file_ID+'.png') |
| 74 | + |
| 75 | + shutil.copy(data_pth + '/' + data_fd + '/left_frames' + '/' + filename, save_pth+'/imagesTr/endovis_'+patient_ID+file_ID+'_0000.png') |
| 76 | + |
| 77 | + |
| 78 | +def get_all_5slice(): |
| 79 | + |
| 80 | + save_pth = base_dir + '/endovis18/2D_all_5slice' |
| 81 | + data_pth_all = [base_dir + '/Dataset907_endovis18', |
| 82 | + ] |
| 83 | + |
| 84 | + for data_pth in data_pth_all: |
| 85 | + data_fd_list = os.listdir(data_pth+'/imagesTr') |
| 86 | + data_fd_list = [data_fd.split('_')[1][0:4] for data_fd in data_fd_list if data_fd.endswith('.png')] |
| 87 | + data_fd_list = list(set(data_fd_list)) |
| 88 | + data_fd_list.sort() |
| 89 | + |
| 90 | + cnt = 0 |
| 91 | + for data_fd_indx, data_fd in enumerate(data_fd_list): |
| 92 | + case_id = data_fd |
| 93 | + |
| 94 | + if not os.path.exists(save_pth+'/'+case_id): |
| 95 | + os.makedirs(save_pth+'/'+case_id) |
| 96 | + os.mkdir(save_pth+'/'+case_id+'/images') |
| 97 | + os.mkdir(save_pth+'/'+case_id+'/masks') |
| 98 | + |
| 99 | + filename_all = os.listdir(data_pth+'/imagesTr') |
| 100 | + filename_all = [filename for filename in filename_all if filename.endswith('.png') and filename.split('_')[1][0:4]==case_id] |
| 101 | + filename_all.sort() |
| 102 | + |
| 103 | + print(case_id) |
| 104 | + |
| 105 | + img_arr, mask_arr = [], [] |
| 106 | + for filename in filename_all: |
| 107 | + |
| 108 | + image_slice = np.array(Image.open(data_pth+'/imagesTr/'+filename)) |
| 109 | + mask_slice = np.array(Image.open(data_pth+'/labelsTr/'+filename.replace('_0000.png', '.png'))) |
| 110 | + h, w = image_slice.shape[0], image_slice.shape[1] |
| 111 | + out_h, out_w = 512, 512 |
| 112 | + if h != 512 or w !=512: |
| 113 | + image_slice = zoom(image_slice, (out_h / h, out_w / w, 1.0), order=3) |
| 114 | + mask_slice = zoom(mask_slice, (out_h / h, out_w / w), order=0) |
| 115 | + |
| 116 | + img_arr.append(image_slice) |
| 117 | + mask_arr.append(mask_slice) |
| 118 | + |
| 119 | + img_arr = np.transpose(np.array(img_arr), (1, 2, 3, 0)) |
| 120 | + mask_arr = np.transpose(np.array(mask_arr), (1, 2, 0)) |
| 121 | + |
| 122 | + print(case_id) |
| 123 | + |
| 124 | + img_arr = np.concatenate((img_arr[:, :, :, 0:1], img_arr[:, :, :, 0:1], img_arr, img_arr[:, :, :, -1:], img_arr[:, :, :, -1:]), axis=-1) |
| 125 | + mask_arr = np.concatenate((mask_arr[:, :, 0:1], mask_arr[:, :, 0:1], mask_arr, mask_arr[:, :, -1:], mask_arr[:, :, -1:]), axis=-1) |
| 126 | + |
| 127 | + for slice_indx in range(2, img_arr.shape[-1]-2): |
| 128 | + |
| 129 | + slice_arr = img_arr[:,:,:,slice_indx-2: slice_indx+3] |
| 130 | + slice_arr = np.flip(np.rot90(slice_arr, k=1, axes=(0, 1)), axis=1) |
| 131 | + |
| 132 | + mask_arr_2D = mask_arr[:,:,slice_indx-2: slice_indx+3] |
| 133 | + mask_arr_2D = np.flip(np.rot90(mask_arr_2D, k=1, axes=(0, 1)), axis=1) |
| 134 | + |
| 135 | + with open(save_pth+'/'+case_id+'/images'+'/2Dimage_'+'{:04d}'.format(slice_indx-2)+'.pkl', 'wb') as file: |
| 136 | + pickle.dump(slice_arr, file) |
| 137 | + |
| 138 | + with open(save_pth+'/'+case_id+'/masks'+'/2Dmask_'+'{:04d}'.format(slice_indx-2)+'.pkl', 'wb') as file: |
| 139 | + pickle.dump(mask_arr_2D, file) |
| 140 | + |
| 141 | + cnt += 1 |
| 142 | + |
| 143 | + |
| 144 | +def get_csv(): |
| 145 | + |
| 146 | + save_pth = base_dir + '/endovis18/2D_all_5slice' |
| 147 | + |
| 148 | + training_csv = save_pth+'/training.csv' |
| 149 | + validation_csv = save_pth+'/validation.csv' |
| 150 | + test_csv = save_pth+'/test.csv' |
| 151 | + |
| 152 | + data_fd_list = os.listdir(save_pth) |
| 153 | + data_fd_list = [data_fd for data_fd in data_fd_list if data_fd.startswith('00') and '.' not in data_fd] |
| 154 | + |
| 155 | + random.shuffle(data_fd_list) |
| 156 | + random.shuffle(data_fd_list) |
| 157 | + random.shuffle(data_fd_list) |
| 158 | + random.shuffle(data_fd_list) |
| 159 | + random.shuffle(data_fd_list) |
| 160 | + |
| 161 | + test_fd_list = ['0021', '0022', '0023', '0024'] |
| 162 | + |
| 163 | + training_fd_list = list(set(data_fd_list)-set(test_fd_list)) |
| 164 | + validation_fd_list = random.sample(test_fd_list, 4) |
| 165 | + |
| 166 | + path_list_all = [] |
| 167 | + for data_fd in training_fd_list: |
| 168 | + slice_list = os.listdir(save_pth+'/'+data_fd+'/images') |
| 169 | + slice_pth_list = [data_fd+'/images/'+slice for slice in slice_list] |
| 170 | + path_list_all = path_list_all + slice_pth_list |
| 171 | + |
| 172 | + random.shuffle(path_list_all) |
| 173 | + random.shuffle(path_list_all) |
| 174 | + random.shuffle(path_list_all) |
| 175 | + random.shuffle(path_list_all) |
| 176 | + random.shuffle(path_list_all) |
| 177 | + df = pd.DataFrame(path_list_all, columns=['image_pth']) |
| 178 | + df['mask_pth'] = path_list_all |
| 179 | + df['mask_pth'] = df['mask_pth'].apply(lambda x: x.replace('/images/2Dimage_', '/masks/2Dmask_')) |
| 180 | + |
| 181 | + df.to_csv(training_csv, index=False) |
| 182 | + |
| 183 | + path_list_all = [] |
| 184 | + for data_fd in validation_fd_list: |
| 185 | + slice_list = os.listdir(save_pth+'/'+data_fd+'/images') |
| 186 | + slice_pth_list = [data_fd+'/images/'+slice for slice in slice_list] |
| 187 | + path_list_all = path_list_all + slice_pth_list |
| 188 | + |
| 189 | + random.shuffle(path_list_all) |
| 190 | + random.shuffle(path_list_all) |
| 191 | + random.shuffle(path_list_all) |
| 192 | + random.shuffle(path_list_all) |
| 193 | + random.shuffle(path_list_all) |
| 194 | + df = pd.DataFrame(path_list_all, columns=['image_pth']) |
| 195 | + df['mask_pth'] = path_list_all |
| 196 | + df['mask_pth'] = df['mask_pth'].apply(lambda x: x.replace('/images/2Dimage_', '/masks/2Dmask_')) |
| 197 | + |
| 198 | + df.to_csv(validation_csv, index=False) |
| 199 | + |
| 200 | + path_list_all = [] |
| 201 | + for data_fd in test_fd_list: |
| 202 | + slice_list = os.listdir(save_pth+'/'+data_fd+'/images') |
| 203 | + slice_list.sort() |
| 204 | + slice_pth_list = [data_fd+'/images/'+slice for slice in slice_list] |
| 205 | + path_list_all = path_list_all + slice_pth_list |
| 206 | + |
| 207 | + df = pd.DataFrame(path_list_all, columns=['image_pth']) |
| 208 | + df['mask_pth'] = path_list_all |
| 209 | + df['mask_pth'] = df['mask_pth'].apply(lambda x: x.replace('/images/2Dimage_', '/masks/2Dmask_')) |
| 210 | + |
| 211 | + df.to_csv(test_csv, index=False) |
| 212 | + debugc = 1 |
| 213 | + |
| 214 | + |
| 215 | +if __name__=="__main__": |
| 216 | + organize_data() |
| 217 | + get_all_5slice() |
| 218 | + get_csv() |
0 commit comments