From ae89c6a9cbb6def226ab787a942b60a2fb2a8fd4 Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Tue, 9 Apr 2024 11:22:43 +0100 Subject: [PATCH 01/11] get each direction working separately --- run.sh | 1 + voxaboxen/data/data.py | 143 +++++++------ voxaboxen/evaluation/evaluation.py | 318 ++++++++++++++++------------- voxaboxen/model/model.py | 71 ++++--- voxaboxen/training/params.py | 37 ++-- voxaboxen/training/train.py | 238 ++++++++++++--------- voxaboxen/training/train_model.py | 20 +- 7 files changed, 461 insertions(+), 367 deletions(-) create mode 100644 run.sh diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..fc135c2 --- /dev/null +++ b/run.sh @@ -0,0 +1 @@ +python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=demo --lr=.00005 --batch-size=4 --n-epochs 4 diff --git a/voxaboxen/data/data.py b/voxaboxen/data/data.py index 620c614..924ce86 100644 --- a/voxaboxen/data/data.py +++ b/voxaboxen/data/data.py @@ -1,9 +1,7 @@ -import os import math import numpy as np import pandas as pd import librosa -import warnings from numpy.random import default_rng from intervaltree import IntervalTree @@ -16,16 +14,16 @@ def normalize_sig_np(sig, eps=1e-8): sig = sig / (np.max(np.abs(sig))+eps) return sig - + def crop_and_pad(wav, sr, dur_sec): # crops and pads waveform to be the expected number of samples; used after resampling to ensure proper size target_dur_samples = int(sr * dur_sec) wav = wav[..., :target_dur_samples] - + pad = target_dur_samples - wav.size(-1) if pad > 0: wav = F.pad(wav, (0,pad)) #padding starts from last dims - + return wav class DetectionDataset(Dataset): @@ -47,7 +45,7 @@ def __init__(self, info_df, train, args, random_seed_shift = 0): if self.amp_aug: self.amp_aug_low_r = args.amp_aug_low_r self.amp_aug_high_r = args.amp_aug_high_r - assert (self.amp_aug_low_r >= 0) #and (self.amp_aug_high_r <= 1) and + assert (self.amp_aug_low_r >= 0) #and (self.amp_aug_high_r <= 1) and assert (self.amp_aug_low_r <= self.amp_aug_high_r) self.scale_factor = args.scale_factor @@ -59,14 +57,14 @@ def __init__(self, info_df, train, args, random_seed_shift = 0): self.mono = False else: self.mono = True - + if self.train: self.omit_empty_clip_prob = args.omit_empty_clip_prob self.clip_start_offset = self.rng.integers(0, np.floor(self.clip_hop*self.sr)) / self.sr else: self.omit_empty_clip_prob = 0 self.clip_start_offset = 0 - + # make metadata self.make_metadata() @@ -86,15 +84,15 @@ def process_selection_table(self, selection_table_fp): start = row['Begin Time (s)'] end = row['End Time (s)'] label = row['Annotation'] - + if end<=start: continue - + if label in self.label_mapping: label = self.label_mapping[label] else: continue - + if label == self.unknown_label: label_idx = -1 else: @@ -110,7 +108,7 @@ def make_metadata(self): for ii, row in self.info_df.iterrows(): fn = row['fn'] audio_fp = row['audio_fp'] - + duration = librosa.get_duration(path=audio_fp) selection_table_fp = row['selection_table_fp'] @@ -141,10 +139,10 @@ def get_pos_intervals(self, fn, start, end): intervals = [(max(iv.begin, start)-start, min(iv.end, end)-start, iv.data) for iv in intervals] return intervals - + def get_class_proportions(self): counts = np.zeros((self.n_classes,)) - + for k in self.selection_table_dict: st = self.selection_table_dict[k] for interval in st: @@ -153,91 +151,101 @@ def get_class_proportions(self): continue else: counts[annot] += 1 - + total_count = np.sum(counts) proportions = counts / total_count - + return proportions - def get_annotation(self, pos_intervals, audio): - raw_seq_len = audio.shape[-1] seq_len = int(math.ceil(raw_seq_len / self.scale_factor_raw_to_prediction)) - regression_anno = np.zeros((seq_len,)) - class_anno = np.zeros((seq_len, self.n_classes)) - anno_sr = int(self.sr // self.scale_factor_raw_to_prediction) - + + regression_annos = np.zeros((seq_len,)) + class_annos = np.zeros((seq_len, self.n_classes)) anchor_annos = [np.zeros(seq_len,)] + rev_regression_annos = np.zeros((seq_len,)) + rev_class_annos = np.zeros((seq_len, self.n_classes)) + rev_anchor_annos = [np.zeros(seq_len,)] for iv in pos_intervals: start, end, class_idx = iv dur = end-start - + dur_samples = np.ceil(dur * anno_sr) + start_idx = int(math.floor(start*anno_sr)) start_idx = max(min(start_idx, seq_len-1), 0) - dur_samples = np.ceil(dur * anno_sr) - + end_idx = int(math.floor(end*anno_sr)) + end_idx = max(min(end_idx, seq_len-1), 0) + anchor_anno = get_anchor_anno(start_idx, dur_samples, seq_len) anchor_annos.append(anchor_anno) - regression_anno[start_idx] = dur + regression_annos[start_idx] = dur + rev_anchor_anno = get_anchor_anno(end_idx, dur_samples, seq_len) + rev_anchor_annos.append(rev_anchor_anno) + rev_regression_annos[end_idx] = dur if class_idx != -1: - class_anno[start_idx, class_idx] = 1. + class_annos[start_idx, class_idx] = 1. + rev_class_annos[end_idx, class_idx] = 1. else: - class_anno[start_idx, :] = 1./self.n_classes # if unknown, enforce uncertainty - + class_annos[start_idx, :] = 1./self.n_classes # if unknown, enforce uncertainty + rev_class_annos[end_idx, :] = 1./self.n_classes # if unknown, enforce uncertainty + anchor_annos = np.stack(anchor_annos) anchor_annos = np.amax(anchor_annos, axis = 0) - - return anchor_annos, regression_anno, class_anno # shapes [time_steps, ], [time_steps, ], [time_steps, n_classes] + rev_anchor_annos = np.stack(rev_anchor_annos) + rev_anchor_annos = np.amax(rev_anchor_annos, axis = 0) + # shapes [time_steps, ], [time_steps, ], [time_steps, n_classes] + return anchor_annos, regression_annos, class_annos, rev_anchor_annos, rev_regression_annos, rev_class_annos def __getitem__(self, index): fn, audio_fp, start, end = self.metadata[index] - - audio, file_sr = librosa.load(audio_fp, sr=None, offset=start, duration=self.clip_duration, mono=self.mono) + + audio, file_sr = librosa.load(audio_fp, sr=None, offset=start, duration=self.clip_duration, mono=self.mono) audio = torch.from_numpy(audio) - + audio = audio-torch.mean(audio, -1, keepdim=True) if self.amp_aug and self.train: audio = self.augment_amplitude(audio) if file_sr != self.sr: - audio = torchaudio.functional.resample(audio, file_sr, self.sr) - + audio = torchaudio.functional.resample(audio, file_sr, self.sr) + audio = crop_and_pad(audio, self.sr, self.clip_duration) - + pos_intervals = self.get_pos_intervals(fn, start, end) - anchor_anno, regression_anno, class_anno = self.get_annotation(pos_intervals, audio) + anchor_anno, regression_anno, class_anno, rev_anchor_anno, rev_regression_anno, rev_class_anno = self.get_annotation(pos_intervals, audio) - return audio, torch.from_numpy(anchor_anno), torch.from_numpy(regression_anno), torch.from_numpy(class_anno) + return audio, torch.from_numpy(anchor_anno), torch.from_numpy(regression_anno), torch.from_numpy(class_anno), torch.from_numpy(rev_anchor_anno), torch.from_numpy(rev_regression_anno), torch.from_numpy(rev_class_anno) def __len__(self): return len(self.metadata) - - + + def get_train_dataloader(args, random_seed_shift = 0): train_info_fp = args.train_info_fp train_info_df = pd.read_csv(train_info_fp) - + train_dataset = DetectionDataset(train_info_df, True, args, random_seed_shift = random_seed_shift) - + if args.mixup: effective_batch_size = args.batch_size*2 # double batch size because half will be discarded before being passed to model else: effective_batch_size = args.batch_size - - + + train_dataloader = DataLoader(train_dataset, - batch_size=effective_batch_size, + batch_size=effective_batch_size, shuffle=True, - num_workers=args.num_workers, - pin_memory=True, + #num_workers=args.num_workers, + num_workers=0, + pin_memory=True, drop_last = True) - + return train_dataloader - + class SingleClipDataset(Dataset): def __init__(self, audio_fp, clip_hop, args, annot_fp = None): # waveform (samples,) @@ -253,26 +261,26 @@ def __init__(self, audio_fp, clip_hop, args, annot_fp = None): self.mono = False else: self.mono = True - + def __len__(self): return self.num_clips def __getitem__(self, idx): """ Map int idx to dict of torch tensors """ start = idx * self.clip_hop - + audio, file_sr = librosa.load(self.audio_fp, sr=None, offset=start, duration=self.clip_duration, mono=self.mono) audio = torch.from_numpy(audio) - - + + audio = audio-torch.mean(audio, -1, keepdim=True) if file_sr != self.sr: - audio = torchaudio.functional.resample(audio, file_sr, self.sr) - + audio = torchaudio.functional.resample(audio, file_sr, self.sr) + audio = crop_and_pad(audio, self.sr, self.clip_duration) - + return audio - + def get_single_clip_data(audio_fp, clip_hop, args, annot_fp = None): return DataLoader( SingleClipDataset(audio_fp, clip_hop, args, annot_fp = annot_fp), @@ -284,33 +292,33 @@ def get_single_clip_data(audio_fp, clip_hop, args, annot_fp = None): ) def get_val_dataloader(args): - val_info_fp = args.val_info_fp + val_info_fp = args.val_info_fp val_info_df = pd.read_csv(val_info_fp) - + val_dataloaders = {} - + for i in range(len(val_info_df)): fn = val_info_df.iloc[i]['fn'] audio_fp = val_info_df.iloc[i]['audio_fp'] annot_fp = val_info_df.iloc[i]['selection_table_fp'] val_dataloaders[fn] = get_single_clip_data(audio_fp, args.clip_duration/2, args, annot_fp = annot_fp) - + return val_dataloaders def get_test_dataloader(args): test_info_fp = args.test_info_fp test_info_df = pd.read_csv(test_info_fp) - + test_dataloaders = {} - + for i in range(len(test_info_df)): fn = test_info_df.iloc[i]['fn'] audio_fp = test_info_df.iloc[i]['audio_fp'] annot_fp = test_info_df.iloc[i]['selection_table_fp'] test_dataloaders[fn] = get_single_clip_data(audio_fp, args.clip_duration/2, args, annot_fp = annot_fp) - + return test_dataloaders - + def get_anchor_anno(start_idx, dur_samples, seq_len): # start times plus gaussian blur # std setting follows CornerNet, where adaptive standard deviation is set to 1/3 image radius @@ -319,4 +327,5 @@ def get_anchor_anno(start_idx, dur_samples, seq_len): x = x / (2 * std**2) x = np.exp(-x) return x - \ No newline at end of file + + diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index 4896ca1..6b3691f 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -15,7 +15,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu" -def pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_probs, pred_sr): +def pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_probs, pred_sr, is_rev): ''' detection_peaks, detection_probs, durations, class_idxs, class_probs : shape=(num_frames,) @@ -29,23 +29,27 @@ def pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_pro detection_probs_sub = [] class_idxs_sub = [] class_probs_sub = [] - + for i in range(len(detection_peaks)): duration = durations[i] - start = detection_peaks[i] - if duration <= 0: continue - - bbox = [start, start+duration] + + if is_rev: + end = detection_peaks[i] + bbox = [end-duration, end] + else: + start = detection_peaks[i] + bbox = [start, start+duration] + bboxes.append(bbox) - + detection_probs_sub.append(detection_probs[i]) class_idxs_sub.append(class_idxs[i]) class_probs_sub.append(class_probs[i]) - + return np.array(bboxes), np.array(detection_probs_sub), np.array(class_idxs_sub), np.array(class_probs_sub) - + def bbox2raven(bboxes, class_idxs, label_set, detection_probs, class_probs, unknown_label): ''' output bounding boxes to a selection table @@ -55,18 +59,18 @@ def bbox2raven(bboxes, class_idxs, label_set, detection_probs, class_probs, unkn bboxes: numpy array shape=(num_bboxes, 2) - + class_idxs: numpy array shape=(num_bboxes,) label_set: list - + detection_probs: numpy array shape =(num_bboxes,) - + class_probs: numpy array shape = (num_bboxes,) - + unknown_label: str ''' @@ -74,14 +78,14 @@ def bbox2raven(bboxes, class_idxs, label_set, detection_probs, class_probs, unkn return [['Begin Time (s)', 'End Time (s)', 'Annotation', 'Detection Prob', 'Class Prob']] columns = ['Begin Time (s)', 'End Time (s)', 'Annotation', 'Detection Prob', 'Class Prob'] - - + + def label_idx_to_label(i): if i==-1: return unknown_label else: return label_set[i] - + out_data = [[bbox[0], bbox[1], label_idx_to_label(int(c)), dp, cp] for bbox, c, dp, cp in zip(bboxes, class_idxs, detection_probs, class_probs)] out_data = sorted(out_data, key=lambda x: x[:2]) @@ -110,74 +114,88 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True): model = model.to(device) model.eval() - + all_detections = [] all_regressions = [] - all_classifications = [] - + all_classifs = [] + all_rev_detections = [] + all_rev_regressions = [] + all_rev_classifs = [] + if verbose: iterator = tqdm.tqdm(enumerate(single_clip_dataloader), total=len(single_clip_dataloader)) else: iterator = enumerate(single_clip_dataloader) - + with torch.no_grad(): for i, X in iterator: X = X.to(device = device, dtype = torch.float) X, _, _, _ = rms_and_mixup(X, None, None, None, False, args) - - detection, regression, classification = model(X) - classification = torch.nn.functional.softmax(classification, dim=-1) - + + detection, regression, classif, rev_detection, rev_regression, rev_classif = model(X) + classif = torch.nn.functional.softmax(classif, dim=-1) + rev_classif = torch.nn.functional.softmax(rev_classif, dim=-1) + all_detections.append(detection) all_regressions.append(regression) - all_classifications.append(classification) - + all_classifs.append(classif) + all_rev_detections.append(rev_detection) + all_rev_regressions.append(rev_regression) + all_rev_classifs.append(rev_classif) + all_detections = torch.cat(all_detections) all_regressions = torch.cat(all_regressions) - all_classifications = torch.cat(all_classifications) + all_classifs = torch.cat(all_classifs) + all_rev_detections = torch.cat(all_rev_detections) + all_rev_regressions = torch.cat(all_rev_regressions) + all_rev_classifs = torch.cat(all_rev_classifs) - # we use half overlapping windows, need to throw away boundary predictions - # See get_val_dataloader and get_test_dataloader in data.py - - ######## Todo: Need better checking that preds are the correct dur + + ######## Todo: Need better checking that preds are the correct dur assert all_detections.size(dim=1) % 2 == 0 first_quarter_window_dur_samples=all_detections.size(dim=1)//4 last_quarter_window_dur_samples=(all_detections.size(dim=1)//2)-first_quarter_window_dur_samples - - # assemble detections - beginning_bit = all_detections[0,:first_quarter_window_dur_samples] - end_bit = all_detections[-1,-last_quarter_window_dur_samples:] - detections_clipped = all_detections[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples] - all_detections = torch.reshape(detections_clipped, (-1,)) - all_detections = torch.cat([beginning_bit, all_detections, end_bit]) - - # assemble regressions - beginning_bit = all_regressions[0,:first_quarter_window_dur_samples] - end_bit = all_regressions[-1,-last_quarter_window_dur_samples:] - regressions_clipped = all_regressions[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples] - all_regressions = torch.reshape(regressions_clipped, (-1,)) - all_regressions = torch.cat([beginning_bit, all_regressions, end_bit]) - - # assemble classifications - beginning_bit = all_classifications[0,:first_quarter_window_dur_samples, :] - end_bit = all_classifications[-1,-last_quarter_window_dur_samples:, :] - classifications_clipped = all_classifications[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples,:] - all_classifications = torch.reshape(classifications_clipped, (-1, classifications_clipped.size(-1))) - all_classifications = torch.cat([beginning_bit, all_classifications, end_bit]) - - return all_detections.detach().cpu().numpy(), all_regressions.detach().cpu().numpy(), all_classifications.detach().cpu().numpy() + + def assemble(d, r, c): + """We use half overlapping windows, need to throw away boundary predictions. + See get_val_dataloader and get_test_dataloader in data.py""" + # assemble detections + beginning_d_bit = d[0,:first_quarter_window_dur_samples] + end_d_bit = d[-1,-last_quarter_window_dur_samples:] + d_clipped = d[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples] + middle_d_bit = torch.reshape(d_clipped, (-1,)) + assembled_d = torch.cat([beginning_d_bit, middle_d_bit, end_d_bit]) + + # assemble regressions + beginning_r_bit = r[0,:first_quarter_window_dur_samples] + end_r_bit = r[-1,-last_quarter_window_dur_samples:] + r_clipped = r[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples] + middle_r_bit = torch.reshape(r_clipped, (-1,)) + assembled_r = torch.cat([beginning_r_bit, middle_r_bit, end_r_bit]) + + # assemble classifs + beginning_c_bit = c[0,:first_quarter_window_dur_samples, :] + end_c_bit = c[-1,-last_quarter_window_dur_samples:, :] + c_clipped = c[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples,:] + middle_c_bit = torch.reshape(c_clipped, (-1, c_clipped.size(-1))) + assembled_c = torch.cat([beginning_c_bit, middle_c_bit, end_c_bit]) + return assembled_d.detach().cpu().numpy(), assembled_r.detach().cpu().numpy(), assembled_c.detach().cpu().numpy(), + + assembled_dets, assembled_regs, assembled_classifs = assemble(all_detections, all_regressions, all_classifs) + assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs = assemble(all_rev_detections, all_rev_regressions, all_rev_classifs) + return assembled_dets, assembled_regs, assembled_classifs, assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs def generate_features(model, single_clip_dataloader, args, verbose = True): model = model.to(device) model.eval() - + all_features = [] - + if verbose: iterator = tqdm.tqdm(enumerate(single_clip_dataloader), total=len(single_clip_dataloader)) else: iterator = enumerate(single_clip_dataloader) - + with torch.no_grad(): for i, X in iterator: X = X.to(device = device, dtype = torch.float) @@ -185,127 +203,130 @@ def generate_features(model, single_clip_dataloader, args, verbose = True): features = model.generate_features(X) all_features.append(features) all_features = torch.cat(all_features) - - ######## Need better checking that features are the correct dur + + ######## Need better checking that features are the correct dur assert all_features.size(dim=1) % 2 == 0 first_quarter_window_dur_samples=all_features.size(dim=1)//4 last_quarter_window_dur_samples=(all_features.size(dim=1)//2)-first_quarter_window_dur_samples - + # assemble features beginning_bit = all_features[0,:first_quarter_window_dur_samples,:] end_bit = all_features[-1,-last_quarter_window_dur_samples:,:] features_clipped = all_features[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples,:] all_features = torch.reshape(features_clipped, (-1, features_clipped.size(-1))) all_features = torch.cat([beginning_bit, all_features, end_bit]) - + return all_features.detach().cpu().numpy() -def export_to_selection_table(detections, regressions, classifications, fn, args, verbose=True, target_dir=None, detection_threshold = 0.5, classification_threshold = 0): - +#def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0): +def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, classif_threshold=0): + if target_dir is None: - target_dir = args.experiment_output_dir + target_dir = args.experiment_output_dir + if is_rev: + fn += '-rev' # Debugging # -# target_fp = os.path.join(target_dir, f"detections_{fn}.npy") -# np.save(target_fp, detections) - -# target_fp = os.path.join(target_dir, f"regressions_{fn}.npy") -# np.save(target_fp, regressions) - -# target_fp = os.path.join(target_dir, f"classifications_{fn}.npy") -# np.save(target_fp, classifications) - - ## peaks - detection_peaks, properties = find_peaks(detections, height = detection_threshold, distance=args.peak_distance) - detection_probs = properties['peak_heights'] - - ## regressions and classifications +# target_fp = os.path.join(target_dir, f"dets_{fn}.npy") +# np.save(target_fp, dets) + +# target_fp = os.path.join(target_dir, f"regs_{fn}.npy") +# np.save(target_fp, regs) + +# target_fp = os.path.join(target_dir, f"classifs_{fn}.npy") +# np.save(target_fp, classifs) + + ## peaks + det_peaks, properties = find_peaks(dets, height=args.detection_threshold, distance=args.peak_distance) + det_probs = properties['peak_heights'] + + ## regs and classifs durations = [] class_idxs = [] class_probs = [] - - for i in detection_peaks: - dur = regressions[i] + + for i in det_peaks: + dur = regs[i] durations.append(dur) - - c = np.argmax(classifications[i,:]) - p = classifications[i,c] - - if p < classification_threshold: + + c = np.argmax(classifs[i,:]) + p = classifs[i,c] + + if p < classif_threshold: c = -1 - + class_idxs.append(c) class_probs.append(p) - + durations = np.array(durations) class_idxs = np.array(class_idxs) class_probs = np.array(class_probs) - + pred_sr = args.sr // (args.scale_factor * args.prediction_scale_factor) - - bboxes, detection_probs, class_idxs, class_probs = pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_probs, pred_sr) - + + bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_rev) + if args.nms == "soft_nms": - bboxes, detection_probs, class_idxs, class_probs = soft_nms(bboxes, detection_probs, class_idxs, class_probs, sigma = args.soft_nms_sigma, thresh = args.detection_threshold) + bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=args.detection_threshold) elif args.nms == "nms": - bboxes, detection_probs, class_idxs, class_probs = nms(bboxes, detection_probs, class_idxs, class_probs, iou_thresh = args.nms_thresh) - + bboxes, det_probs, class_idxs, class_probs = nms(bboxes, det_probs, class_idxs, class_probs, iou_thresh=args.nms_thresh) + if verbose: - print(f"Found {len(detection_probs)} boxes") - + print(f"Found {len(det_probs)} boxes") + target_fp = os.path.join(target_dir, f"peaks_pred_{fn}.txt") - - st = bbox2raven(bboxes, class_idxs, args.label_set, detection_probs, class_probs, args.unknown_label) + + st = bbox2raven(bboxes, class_idxs, args.label_set, det_probs, class_probs, args.unknown_label) write_tsv(target_fp, st) - + return target_fp - + def get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold): c = Clip(label_set=args.label_set, unknown_label=args.unknown_label) - + c.load_predictions(predictions_fp) c.threshold_class_predictions(class_threshold) c.load_annotations(annotations_fp, label_mapping = args.label_mapping) - + metrics = {} - + c.compute_matching(IoU_minimum = iou) metrics = c.evaluate() - + return metrics def get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold): c = Clip(label_set=args.label_set, unknown_label=args.unknown_label) - + c.load_predictions(predictions_fp) c.threshold_class_predictions(class_threshold) c.load_annotations(annotations_fp, label_mapping = args.label_mapping) - + confusion_matrix = {} - + c.compute_matching(IoU_minimum = iou) confusion_matrix, confusion_matrix_labels = c.confusion_matrix() - + return confusion_matrix, confusion_matrix_labels def summarize_metrics(metrics): # metrics (dict) : {fp : fp_metrics} # where # fp_metrics (dict) : {class_label: {'TP': int, 'FP' : int, 'FN' : int}} - + fps = sorted(metrics.keys()) class_labels = sorted(metrics[fps[0]].keys()) - + overall = { l: {'TP' : 0, 'FP' : 0, 'FN' : 0} for l in class_labels} - + for fp in fps: for l in class_labels: counts = metrics[fp][l] overall[l]['TP'] += counts['TP'] overall[l]['FP'] += counts['FP'] overall[l]['FN'] += counts['FN'] - + for l in class_labels: tp = overall[l]['TP'] fp = overall[l]['FP'] @@ -328,16 +349,16 @@ def summarize_metrics(metrics): else: f1 = 2*prec*rec / (prec + rec) overall[l]['f1'] = f1 - + return overall def macro_metrics(summary): # summary (dict) : {class_label: {'f1' : float, 'precision' : float, 'recall' : float, 'TP': int, 'FP' : int, 'FN' : int}} - + metrics = ['f1', 'precision', 'recall'] - + macro = {} - + for metric in metrics: e = [] @@ -345,11 +366,11 @@ def macro_metrics(summary): m = summary[l][metric] e.append(m) macro[metric] = float(np.mean(e)) - + return macro def plot_confusion_matrix(data, label_names, target_dir, name=""): - + fig = plt.figure(num=None, figsize=(12, 8), dpi=80, facecolor='w', edgecolor='k') plt.clf() ax = fig.add_subplot(111) @@ -363,7 +384,7 @@ def plot_confusion_matrix(data, label_names, target_dir, name=""): ax.set_ylabel('Prediction') ax.set_xlabel('Annotation') plt.title(name) - + plt.savefig(os.path.join(target_dir, f"{name}_confusion_matrix.svg")) plt.close() @@ -372,66 +393,79 @@ def summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels): # confusion_matrix (dict) : {fp : fp_cm} # where # fp_cm : numpy array - + fps = sorted(confusion_matrix.keys()) l = len(confusion_matrix_labels) - + overall = np.zeros((l, l)) - + for fp in fps: overall += confusion_matrix[fp] - + return overall, confusion_matrix_labels def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): fns = [] predictions_fps = [] + rev_predictions_fps = [] annotations_fps = [] - + for fn in dataloader_dict: - detections, regressions, classifications = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose) - - predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, verbose = verbose, detection_threshold = args.detection_threshold) - + detections, regressions, classifications, rev_detections, rev_regressions, rev_classifications = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose) + + predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose) + rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose) + annotations_fp = dataloader_dict[fn].dataset.annot_fp - + fns.append(fn) predictions_fps.append(predictions_fp) + rev_predictions_fps.append(rev_predictions_fp) annotations_fps.append(annotations_fp) - - manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'annotations_fp' : annotations_fps}) + + manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'annotations_fp' : annotations_fps}) return manifest - + def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0): - + metrics = {} confusion_matrix = {} - + rev_metrics = {} + rev_confusion_matrix = {} + for i, row in manifest.iterrows(): fn = row['filename'] predictions_fp = row['predictions_fp'] + rev_predictions_fp = row['rev_predictions_fp'] annotations_fp = row['annotations_fp'] - + metrics[fn] = get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold) + rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold) confusion_matrix[fn], confusion_matrix_labels = get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold) - + rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold) + if output_dir is not None: if not os.path.exists(output_dir): os.makedirs(output_dir) - + # summarize and save metrics summary = summarize_metrics(metrics) metrics['summary'] = summary macro = macro_metrics(summary) metrics['macro'] = macro + rev_summary = summarize_metrics(rev_metrics) + rev_metrics['summary'] = rev_summary + rev_macro = macro_metrics(rev_summary) + rev_metrics['macro'] = rev_macro if output_dir is not None: metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml') with open(metrics_fp, 'w') as f: yaml.dump(metrics, f) - + # summarize and save confusion matrix confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels) + rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels) if output_dir is not None: - plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}") - - return metrics, confusion_matrix_summary + plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}") + + return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary diff --git a/voxaboxen/model/model.py b/voxaboxen/model/model.py index b53ee2f..5fe4439 100644 --- a/voxaboxen/model/model.py +++ b/voxaboxen/model/model.py @@ -32,7 +32,7 @@ def forward(self, sig): out = self.model.extract_features(sig)[0][-1] return out - + def freeze(self): for param in self.model.encoder.parameters(): param.requires_grad = False @@ -40,7 +40,7 @@ def freeze(self): def unfreeze(self): for param in self.model.encoder.parameters(): param.requires_grad = True - + class DetectionModel(nn.Module): def __init__(self, args, embedding_dim=768): super().__init__() @@ -48,7 +48,8 @@ def __init__(self, args, embedding_dim=768): self.args = args aves_sr = args.sr // args.scale_factor self.detection_head = DetectionHead(args, embedding_dim = embedding_dim) - + self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim) + def forward(self, x): """ Input @@ -59,22 +60,24 @@ def forward(self, x): class_logits (Tensor): (batch, time, n_classes) (time at 50 Hz, aves_sr) """ - + expected_dur_output = math.ceil(x.size(1)/self.args.scale_factor) - + x = x-torch.mean(x,axis=1,keepdim=True) feats = self.encoder(x) - + #aves may be off by 1 sample from expected pad = expected_dur_output - feats.size(1) if pad>0: feats = F.pad(feats, (0,0,0,pad), mode='reflect') - + detection_logits, regression, class_logits = self.detection_head(feats) detection_probs = torch.sigmoid(detection_logits) - - return detection_probs, regression, class_logits - + rev_detection_logits, rev_regression, rev_class_logits = self.rev_detection_head(feats) + rev_detection_probs = torch.sigmoid(rev_detection_logits) + + return detection_probs, regression, class_logits, rev_detection_probs, rev_regression, rev_class_logits + def generate_features(self, x): """ Input @@ -82,22 +85,22 @@ def generate_features(self, x): Returns features (Tensor): (batch, time) (time at 50 Hz, aves_sr) """ - + expected_dur_output = math.ceil(x.size(-1)/self.args.scale_factor) - + x = x-torch.mean(x,axis=-1,keepdim=True) feats = self.encoder(x) - + #aves may be off by 1 sample from expected pad = expected_dur_output - feats.size(1) if pad>0: feats = F.pad(feats, (0,0,0,pad), mode='reflect') - + return feats - + def freeze_encoder(self): self.encoder.freeze() - + def unfreeze_encoder(self): self.encoder.unfreeze() @@ -107,7 +110,7 @@ def __init__(self, args, embedding_dim=768): self.n_classes = len(args.label_set) self.head = nn.Conv1d(embedding_dim, 2+self.n_classes, args.prediction_scale_factor, stride=args.prediction_scale_factor, padding=0) self.args=args - + def forward(self, x): """ Input @@ -121,15 +124,15 @@ def forward(self, x): x = rearrange(x, 'b t c -> b c t') x = self.head(x) x = rearrange(x, 'b c t -> b t c') - detection_logits = x[:,:,0] + detection_logits = x[:,:,0] reg = x[:,:,1] class_logits = x[:,:,2:] return detection_logits, reg, class_logits - + class DetectionModelStereo(DetectionModel): def __init__(self, args, embedding_dim=768): super().__init__(args, embedding_dim=2*embedding_dim) - + def forward(self, x): """ Input @@ -140,9 +143,9 @@ def forward(self, x): class_logits (Tensor): (batch, time, n_classes) (time at 50 Hz, aves_sr) """ - + expected_dur_output = math.ceil(x.size(-1)/self.args.scale_factor) - + x = x-torch.mean(x,axis=-1,keepdim=True) feats0 = self.encoder(x[:,0,:]) feats1 = self.encoder(x[:,1,:]) @@ -152,12 +155,12 @@ def forward(self, x): pad = expected_dur_output - feats.size(1) if pad>0: feats = F.pad(feats, (0,0,0,pad), mode='reflect') - + detection_logits, regression, class_logits = self.detection_head(feats) detection_probs = torch.sigmoid(detection_logits) - + return detection_probs, regression, class_logits - + def rms_and_mixup(X, d, r, y, train, args): if args.rms_norm: @@ -165,31 +168,31 @@ def rms_and_mixup(X, d, r, y, train, args): ms = ms + torch.full_like(ms, 1e-6) rms = ms ** (-1/2) X = X * rms - + if args.mixup and train: # TODO: For mixup, add in a check that there aren't extremely overlapping vocs - + batch_size = X.size(0) - + mask = torch.full((X.size(0),1,1), 0.5, device=X.device) mask = torch.bernoulli(mask) - + if len(X.size()) == 2: X_aug = torch.flip(X, (0,)) * mask[:,:,0] elif len(X.size()) == 3: X_aug = torch.flip(X, (0,)) * mask - + d_aug = torch.flip(d, (0,)) * mask[:,:,0] r_aug = torch.flip(r, (0,)) * mask[:,:,0] y_aug = torch.flip(y, (0,)) * mask - + X = (X + X_aug)[:batch_size//2,...] d = torch.maximum(d, d_aug)[:batch_size//2,...] r = torch.maximum(r, r_aug)[:batch_size//2,...] y = torch.maximum(y, y_aug)[:batch_size//2,...] - + if args.rms_norm: X = X * (1/2) - + return X, d, r, y - + diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py index 4c01610..6af6ccd 100644 --- a/voxaboxen/training/params.py +++ b/voxaboxen/training/params.py @@ -8,10 +8,11 @@ def parse_args(args,allow_unknown=False): parser = argparse.ArgumentParser() - + # General parser.add_argument('--name', type = str, required=True) parser.add_argument('--seed', type=int, default=0) + parser.add_argument('--is_test', '-t', action='store_true') # Data parser.add_argument('--project-config-fp', type = str, required=True) @@ -19,7 +20,7 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--clip-hop', type=float, default=None, help = "clip hop, in seconds. If None, automatically set to be half clip duration. Used only during training; clip hop is automatically set to be 1/2 clip duration for inference") parser.add_argument('--train-info-fp', type=str, required=False, help = "train info, to override project train info") parser.add_argument('--num-workers', type=int, default=8) - + # Model parser.add_argument('--sr', type=int, default=16000) parser.add_argument('--scale-factor', type=int, default = 320, help = "downscaling performed by aves") @@ -33,11 +34,11 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo") # Training - parser.add_argument('--batch-size', type=int, default=32) - parser.add_argument('--lr', type=float, default=.00005) + parser.add_argument('--batch-size', type=int, default=32) + parser.add_argument('--lr', type=float, default=.00005) parser.add_argument('--n-epochs', type=int, default=50) parser.add_argument('--unfreeze-encoder-epoch', type=int, default=3) - parser.add_argument('--end-mask-perc', type=float, default = 0.1, help="During training, mask loss from a percentage of the frames on each end of the clip") + parser.add_argument('--end-mask-perc', type=float, default = 0.1, help="During training, mask loss from a percentage of the frames on each end of the clip") parser.add_argument('--omit-empty-clip-prob', type=float, default=0, help="if a clip has no annotations, do not use for training with this probability") parser.add_argument('--lamb', type=float, default=.04, help="parameter controlling strength regression loss") parser.add_argument('--rho', type=float, default = .01, help="parameter controlling strength of classification loss") @@ -47,31 +48,31 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--early-stopping', action ="store_true", help="Whether to use early stopping based on val performance") parser.add_argument('--pos-loss-weight', type=float, default=1, help="Weights positive component of loss") - + # Augmentations - parser.add_argument('--amp-aug', action ="store_true", help="Whether to use amplitude augmentation") - parser.add_argument('--amp-aug-low-r', type=float, default = 0.8) - parser.add_argument('--amp-aug-high-r', type=float, default = 1.2) - parser.add_argument('--mixup', action ="store_true", help="Whether to use mixup augmentation") - + parser.add_argument('--amp-aug', action ="store_true", help="Whether to use amplitude augmentation") + parser.add_argument('--amp-aug-low-r', type=float, default = 0.8) + parser.add_argument('--amp-aug-high-r', type=float, default = 1.2) + parser.add_argument('--mixup', action ="store_true", help="Whether to use mixup augmentation") + # Inference parser.add_argument('--peak-distance', type=float, default=5, help="for finding peaks in detection probability, what radius to use for detecting local maxima. In output frame rate.") parser.add_argument('--nms', type = str, default='soft_nms', choices = ['none', 'nms', 'soft_nms'], help="Whether to apply additional nms after finding peaks") parser.add_argument('--soft-nms-sigma', type = float, default = 0.5) parser.add_argument('--soft-nms-thresh', type = float, default = 0.001) parser.add_argument('--nms-thresh', type = float, default = 0.5) - + if allow_unknown: args, remaining = parser.parse_known_args(args) else: args = parser.parse_args(args) - + args = read_config(args) check_config(args) if args.clip_hop is None: setattr(args, "clip_hop", args.clip_duration/2) - + if allow_unknown: return args, remaining else: @@ -80,10 +81,10 @@ def parse_args(args,allow_unknown=False): def read_config(args): with open(args.project_config_fp, 'r') as f: project_config = yaml.safe_load(f) - + for key in project_config: setattr(args,key,project_config[key]) - + return args def set_seed(seed): @@ -104,7 +105,7 @@ def save_params(args): with open(params_file, "w") as f: yaml.dump(args_dict, f) - + def load_params(fp): with open(fp, 'r') as f: args_dict = yaml.safe_load(f) @@ -118,4 +119,4 @@ def load_params(fp): def check_config(args): assert args.end_mask_perc < 0.25, "Masking above 25% of each end during training will interfere with inference" - assert ((args.clip_duration * args.sr)/(4*args.scale_factor)).is_integer(), "Must pick clip duration to ensure no rounding errors during inference" \ No newline at end of file + assert ((args.clip_duration * args.sr)/(4*args.scale_factor)).is_integer(), "Must pick clip duration to ensure no rounding errors during inference" diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py index 5bb7987..6a230c4 100644 --- a/voxaboxen/training/train.py +++ b/voxaboxen/training/train.py @@ -21,25 +21,25 @@ def train(model, args): model = model.to(device) - + if args.previous_checkpoint_fp is not None: print(f"loading model weights from {args.previous_checkpoint_fp}") cp = torch.load(args.previous_checkpoint_fp) model.load_state_dict(cp["model_state_dict"]) - + detection_loss_fn = modified_focal_loss reg_loss_fn = get_reg_loss_fn(args) - + class_loss_fn = get_class_loss_fn(args) - + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad = True) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.step_size, gamma=0.1, last_epoch=- 1, verbose=False) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs, eta_min=0, last_epoch=- 1, verbose=False) - + train_evals = [] learning_rates = [] val_evals = [] - + if args.early_stopping: assert args.val_info_fp is not None best_f1 = 0 @@ -49,24 +49,25 @@ def train(model, args): use_val = True else: use_val = False - + for t in range(args.n_epochs): print(f"Epoch {t}\n-------------------------------") train_dataloader = get_train_dataloader(args, random_seed_shift = t) # reinitialize dataloader with different negatives each epoch model, train_eval = train_epoch(model, t, train_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, optimizer, args) train_evals.append(train_eval.copy()) learning_rates.append(optimizer.param_groups[0]["lr"]) - + train_evals_by_epoch = {i : e for i, e in enumerate(train_evals)} train_evals_fp = os.path.join(args.experiment_dir, "train_history.yaml") with open(train_evals_fp, 'w') as f: yaml.dump(train_evals_by_epoch, f) - + if use_val: - val_eval = val_epoch(model, t, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args) + val_eval, rev_eval = val_epoch(model, t, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args) + # TODO: maybe plot rev-evals val_evals.append(val_eval.copy()) plot_eval(train_evals, learning_rates, args, val_evals = val_evals) - + val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)} val_evals_fp = os.path.join(args.experiment_dir, "val_history.yaml") with open(val_evals_fp, 'w') as f: @@ -74,7 +75,7 @@ def train(model, args): else: plot_eval(train_evals, learning_rates, args) scheduler.step() - + if use_val and args.early_stopping: current_f1 = val_eval['f1'] if current_f1 > best_f1: @@ -89,13 +90,13 @@ def train(model, args): "train_evals": train_evals, "val_evals" : val_evals } - + torch.save( checkpoint_dict, os.path.join(args.experiment_dir, f"model.pt"), - ) - - else: + ) + + else: checkpoint_dict = { "epoch": t, "model_state_dict": model.state_dict(), @@ -104,110 +105,155 @@ def train(model, args): "train_evals": train_evals, "val_evals" : val_evals } - + torch.save( checkpoint_dict, os.path.join(args.experiment_dir, f"model.pt"), - ) - - + ) + + print("Done!") - + cp = torch.load(os.path.join(args.experiment_dir, f"model.pt")) model.load_state_dict(cp["model_state_dict"]) - + # resave validation with best model if use_val: val_epoch(model, t+1, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args) - - return model - + + return model + +def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn): + end_mask_perc = args.end_mask_perc + end_mask_dur = int(det_preds.size(1)*end_mask_perc) + + det_preds_clipped = det_preds[:,end_mask_dur:-end_mask_dur] + dets_clipped = dets[:,end_mask_dur:-end_mask_dur] + + reg_preds_clipped = reg_preds[:,end_mask_dur:-end_mask_dur] + regs_clipped = regs[:,end_mask_dur:-end_mask_dur] + + #y_preds_clipped = y_preds[:,end_mask_dur:-end_mask_dur,:] + y_clipped = y[:,end_mask_dur:-end_mask_dur,:] + + detection_loss = modified_focal_loss(det_preds_clipped, dets_clipped, pos_loss_weight=args.pos_loss_weight) + reg_loss = reg_loss_fn(reg_preds_clipped, regs_clipped, dets_clipped, y_clipped) + #class_loss = class_loss_fn(y_preds_clipped, y_clipped, dets_clipped) + class_loss = torch.tensor(0) + return detection_loss, reg_loss, class_loss + def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, optimizer, args): model.train() if t < args.unfreeze_encoder_epoch: model.freeze_encoder() else: model.unfreeze_encoder() - - + + evals = {} - train_loss = 0; losses = []; detection_losses = []; regression_losses = []; class_losses = [] + normal_train_loss = 0; normal_losses = []; detection_losses = []; regression_losses = []; class_losses = [] + rev_train_loss = 0; rev_losses = []; rev_detection_losses = []; rev_regression_losses = []; rev_class_losses = [] + train_loss = 0; losses = [] data_iterator = tqdm.tqdm(dataloader) - for i, (X, d, r, y) in enumerate(data_iterator): + for i, (X, d, r, y, rev_d, rev_r, rev_y) in enumerate(data_iterator): num_batches_seen = i X = X.to(device = device, dtype = torch.float) d = d.to(device = device, dtype = torch.float) r = r.to(device = device, dtype = torch.float) y = y.to(device = device, dtype = torch.float) - + rev_d = rev_d.to(device = device, dtype = torch.float) + rev_r = rev_r.to(device = device, dtype = torch.float) + rev_y = rev_y.to(device = device, dtype = torch.float) + X, d, r, y = rms_and_mixup(X, d, r, y, True, args) - probs, regression, class_logits = model(X) - + _, rev_d, rev_r, rev_y = rms_and_mixup(X, rev_d, rev_r, rev_y, True, args) + probs, regression, class_logits, rev_probs, rev_regression, rev_class_logits = model(X) + # We mask out loss from each end of the clip, so the model isn't forced to learn to detect events that are partially cut off. # This does not affect inference, because during inference we overlap clips at 50% - - end_mask_perc = args.end_mask_perc - end_mask_dur = int(probs.size(1)*end_mask_perc) - - d_clipped = d[:,end_mask_dur:-end_mask_dur] - probs_clipped = probs[:,end_mask_dur:-end_mask_dur] - - regression_clipped = regression[:,end_mask_dur:-end_mask_dur] - r_clipped = r[:,end_mask_dur:-end_mask_dur] - - class_logits_clipped = class_logits[:,end_mask_dur:-end_mask_dur,:] - y_clipped = y[:,end_mask_dur:-end_mask_dur,:] - - detection_loss = detection_loss_fn(probs_clipped, d_clipped, pos_loss_weight = args.pos_loss_weight) - reg_loss = reg_loss_fn(regression_clipped, r_clipped, d_clipped, y_clipped) - class_loss = class_loss_fn(class_logits_clipped, y_clipped, d_clipped) - - loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss + + detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, class_logits, y, args=args, reg_loss_fn=reg_loss_fn) + rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_class_logits, rev_y, args=args, reg_loss_fn=reg_loss_fn) + normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss + rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss + loss = (normal_loss + rev_loss)/2 + #end_mask_perc = args.end_mask_perc + #end_mask_dur = int(probs.size(1)*end_mask_perc) + + #d_clipped = d[:,end_mask_dur:-end_mask_dur] + #probs_clipped = probs[:,end_mask_dur:-end_mask_dur] + + #regression_clipped = regression[:,end_mask_dur:-end_mask_dur] + #r_clipped = r[:,end_mask_dur:-end_mask_dur] + + #class_logits_clipped = class_logits[:,end_mask_dur:-end_mask_dur,:] + #y_clipped = y[:,end_mask_dur:-end_mask_dur,:] + + #detection_loss = detection_loss_fn(probs_clipped, d_clipped, pos_loss_weight = args.pos_loss_weight) + #reg_loss = reg_loss_fn(regression_clipped, r_clipped, d_clipped, y_clipped) + #class_loss = class_loss_fn(class_logits_clipped, y_clipped, d_clipped) + + #loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss train_loss += loss.item() + rev_train_loss += rev_loss.item() + normal_train_loss += normal_loss.item() + normal_losses.append(normal_loss.item()) + rev_losses.append(rev_loss.item()) losses.append(loss.item()) detection_losses.append(detection_loss.item()) regression_losses.append(args.lamb * reg_loss.item()) class_losses.append(args.rho * class_loss.item()) - + rev_detection_losses.append(rev_detection_loss.item()) + rev_regression_losses.append(args.lamb * rev_reg_loss.item()) + rev_class_losses.append(args.rho * rev_class_loss.item()) + + #if i > 150: + #breakpoint() # Backpropagation optimizer.zero_grad() loss.backward() - + optimizer.step() if i > 10: - data_iterator.set_description(f"Loss {np.mean(losses[-10:]):.7f}, Detection Loss {np.mean(detection_losses[-10:]):.7f}, Regression Loss {np.mean(regression_losses[-10:]):.7f}, Classification Loss {np.mean(class_losses[-10:]):.7f}") - + data_iterator.set_description(f"loss {np.mean(losses[-10:]):.6f}, det {np.mean(detection_losses[-10:]):.6f}, reg {np.mean(regression_losses[-10:]):.6f}, class {np.mean(class_losses[-10:]):.6f} revloss {np.mean(rev_losses[-10:]):.6f}, revdet {np.mean(rev_detection_losses[-10:]):.6f}, revreg {np.mean(rev_regression_losses[-10:]):.6f}, revclass {np.mean(rev_class_losses[-10:]):.6f}") + + if args.is_test and i == 15: break + train_loss = train_loss / num_batches_seen evals['loss'] = float(train_loss) - + print(f"Epoch {t} | Train loss: {train_loss:1.3f}") return model, evals - + def val_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args): model.eval() - + manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False) - e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) - + e, _, rev_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) + summary = e['summary'] - + evals = {k:[] for k in ['precision','recall','f1']} + rev_evals = {k:[] for k in ['precision','recall','f1']} for k in ['precision','recall','f1']: for l in args.label_set: - m = summary[l][k] + m = e['summary'][l][k] + rev_m = rev_e['summary'][l][k] evals[k].append(m) + rev_evals[k].append(rev_m) evals[k] = float(np.mean(evals[k])) - - print(f"Epoch {t} | Val scores @{args.model_selection_iou}IoU: Precision: {evals['precision']:1.3f} Recall: {evals['recall']:1.3f} F1: {evals['f1']:1.3f}") - return evals + rev_evals[k] = float(np.mean(rev_evals[k])) + + print(f"Epoch {t} | Val scores @{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} rev_prec: {rev_evals['precision']:1.3f} rev_rec: {rev_evals['recall']:1.3f} rev_F1: {rev_evals['f1']:1.3f}") + return evals, rev_evals def modified_focal_loss(pred, gt, pos_loss_weight = 1): # Modified from https://github.com/xingyizhou/CenterNet/blob/2b7692c377c6686fb35e473dac2de6105eed62c6/src/lib/models/losses.py - ''' + ''' pred [batch, time,] gt [batch, time,] - ''' - + ''' + pos_inds = gt.eq(1).float() neg_inds = gt.lt(1).float() @@ -217,48 +263,48 @@ def modified_focal_loss(pred, gt, pos_loss_weight = 1): pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds * pos_loss_weight neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds - + loss = -1.*(neg_loss + pos_loss) - + loss = loss.mean() return loss - - + + def masked_reg_loss(regression, r, d, y, class_weights = None): # regression, r (Tensor): [batch, time,] # r (Tensor) : [batch, time,], float tensor # d (Tensor) : [batch, time,], float tensor # y (Tensor) : [batch, time, n_classes] # class_weights (Tensor) : [n_classes,] - + reg_loss = F.l1_loss(regression, r, reduction='none') mask = d.eq(1).float() - + reg_loss = reg_loss * mask - + if class_weights is not None: y = rearrange(y, 'b t c -> b c t') high_prob = torch.amax(y, dim = 1) knowns = high_prob.eq(1).float() unknowns = high_prob.lt(1).float() - + reg_loss_unknowns = reg_loss * unknowns - + class_weights = torch.reshape(class_weights, (1, -1, 1)) class_weights = y * class_weights class_weights = torch.amax(class_weights, dim = 1) - + reg_loss_knowns = reg_loss * knowns * class_weights - + reg_loss = reg_loss_unknowns + reg_loss_knowns - + reg_loss = torch.sum(reg_loss) n_pos = mask.sum() - + if n_pos>0: reg_loss = reg_loss / n_pos - + return reg_loss def masked_classification_loss(class_logits, y, d, class_weights = None): @@ -266,41 +312,41 @@ def masked_classification_loss(class_logits, y, d, class_weights = None): # y (Tensor): [batch, time,n_classes] # d (Tensor) : [batch, time,], float tensor # class_weight : [n_classes,], float tensor - + class_logits = rearrange(class_logits, 'b t c -> b c t') y = rearrange(y, 'b t c -> b c t') - + high_prob = torch.amax(y, dim = 1) knowns = high_prob.eq(1).float() unknowns = high_prob.lt(1).float() - + mask = d.eq(1).float() # mask out time steps where no event is present - + known_class_loss = F.cross_entropy(class_logits, y, weight=class_weights, reduction='none') known_class_loss = known_class_loss * mask * knowns known_class_loss = torch.sum(known_class_loss) - + unknown_class_loss = F.cross_entropy(class_logits, y, weight=None, reduction='none') unknown_class_loss = unknown_class_loss * mask * unknowns unknown_class_loss = torch.sum(unknown_class_loss) - + class_loss = known_class_loss + unknown_class_loss n_pos = mask.sum() - + if n_pos>0: class_loss = class_loss / n_pos - + return class_loss - + def get_class_loss_fn(args): dataloader_temp = get_train_dataloader(args, random_seed_shift = 0) class_proportions = dataloader_temp.dataset.get_class_proportions() class_weights = 1. / (class_proportions + 1e-6) - + class_weights = (1. / (np.mean(class_weights) + 1e-6)) * class_weights # normalize so average weight = 1 - + print(f"Using class weights {class_weights}") - + class_weights = torch.Tensor(class_weights).to(device) return partial(masked_classification_loss, class_weights = class_weights) @@ -308,9 +354,9 @@ def get_reg_loss_fn(args): dataloader_temp = get_train_dataloader(args, random_seed_shift = 0) class_proportions = dataloader_temp.dataset.get_class_proportions() class_weights = 1. / (class_proportions + 1e-6) - + class_weights = (1. / (np.mean(class_weights) + 1e-6)) * class_weights # normalize so average weight = 1 - + class_weights = torch.Tensor(class_weights).to(device) return partial(masked_reg_loss, class_weights = class_weights) - \ No newline at end of file + diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index ee60a62..a81700a 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -13,36 +13,36 @@ def train_model(args): args = parse_args(args) set_seed(args.seed) - + experiment_dir = os.path.join(args.project_dir, args.name) setattr(args, 'experiment_dir', str(experiment_dir)) if not os.path.exists(args.experiment_dir): os.makedirs(args.experiment_dir) - + experiment_output_dir = os.path.join(experiment_dir, "outputs") setattr(args, 'experiment_output_dir', experiment_output_dir) if not os.path.exists(args.experiment_output_dir): os.makedirs(args.experiment_output_dir) - + save_params(args) if hasattr(args,'stereo') and args.stereo: model = DetectionModelStereo(args) else: model = DetectionModel(args) - + ## Training - trained_model = train(model, args) - + trained_model = train(model, args) + ## Evaluation test_dataloader = get_test_dataloader(args) - + manifest = predict_and_generate_manifest(trained_model, test_dataloader, args) - + for iou in [0.2, 0.5, 0.8]: for class_threshold in [0.0, 0.5, 0.95]: - evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) + metrics, conf_mat, rev_metrics, rev_conf_mat = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) if __name__ == "__main__": train_model(sys.argv[1:]) - + # python main.py --name=debug --lr=0.0001 --n-epochs=6 --clip-duration=4 --batch-size=100 --omit-empty-clip-prob=0.5 --clip-hop=2 From 059d0474f48542e550671233fda0b640b9a67691 Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Tue, 9 Apr 2024 16:58:07 +0100 Subject: [PATCH 02/11] implemente combining both directions, seems to be working well on MT --- voxaboxen/evaluation/evaluation.py | 64 ++++++++++++++++++++++++++--- voxaboxen/evaluation/raven_utils.py | 62 ++++++++++++++-------------- voxaboxen/training/train.py | 32 ++++++++------- voxaboxen/training/train_model.py | 6 ++- 4 files changed, 111 insertions(+), 53 deletions(-) diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index 6b3691f..cd00ea7 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -12,6 +12,7 @@ from voxaboxen.evaluation.raven_utils import Clip from voxaboxen.model.model import rms_and_mixup from voxaboxen.evaluation.nms import nms, soft_nms +plt.switch_backend('agg') device = "cuda" if torch.cuda.is_available() else "cpu" @@ -408,6 +409,7 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): fns = [] predictions_fps = [] rev_predictions_fps = [] + comb_predictions_fps = [] annotations_fps = [] for fn in dataloader_dict: @@ -415,34 +417,79 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose) rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose) + comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn) annotations_fp = dataloader_dict[fn].dataset.annot_fp fns.append(fn) predictions_fps.append(predictions_fp) rev_predictions_fps.append(rev_predictions_fp) + comb_predictions_fps.append(comb_predictions_fp) annotations_fps.append(annotations_fp) - manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'annotations_fp' : annotations_fps}) + manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'annotations_fp' : annotations_fps}) return manifest -def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0): +def combine_fwd_bck_preds(target_dir, fn): + fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}.txt') + bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-rev.txt') + comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt') + fwd_preds = pd.read_csv(fwd_preds_fp, sep='\t') + bck_preds = pd.read_csv(bck_preds_fp, sep='\t') + + c = Clip() + c.load_annotations(fwd_preds_fp) + c.load_predictions(bck_preds_fp) + c.compute_matching(IoU_minimum=0.5) + #comb_preds = fwd_preds.copy() + match_preds_list = [] + for fp, bp in c.matching: + match_pred = fwd_preds.loc[fp].copy() + bck_pred = bck_preds.iloc[bp] + bp_end_time = bck_pred['End Time (s)'] + match_pred['End Time (s)'] = bp_end_time + match_pred['Detection Prob'] = 1 - (1-match_pred['Detection Prob'])*(1-bck_pred['Detection Prob']) + match_preds_list.append(match_pred) + + match_preds = pd.DataFrame(match_preds_list) + # Now include the union of all that weren't matched + fwd_matched_idxs = [m[0] for m in c.matching] + bck_matched_idxs = [m[1] for m in c.matching] + fwd_unmatched = select_from_neg_idxs(fwd_preds, fwd_matched_idxs) + bck_unmatched = select_from_neg_idxs(bck_preds, bck_matched_idxs) + comb_preds = pd.concat([match_preds, fwd_unmatched, bck_unmatched]) + assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching) + comb_preds.sort_values('Begin Time (s)') + comb_preds.index = list(range(len(comb_preds))) + + comb_preds.to_csv(comb_preds_fp, sep='\t', index=False) + return comb_preds_fp + +def select_from_neg_idxs(df, neg_idxs): + bool_mask = [i not in neg_idxs for i in range(len(df))] + return df.loc[bool_mask] +def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0): metrics = {} confusion_matrix = {} rev_metrics = {} rev_confusion_matrix = {} + comb_metrics = {} + comb_confusion_matrix = {} for i, row in manifest.iterrows(): fn = row['filename'] predictions_fp = row['predictions_fp'] rev_predictions_fp = row['rev_predictions_fp'] + comb_predictions_fp = row['comb_predictions_fp'] annotations_fp = row['annotations_fp'] metrics[fn] = get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold) - rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold) confusion_matrix[fn], confusion_matrix_labels = get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold) + rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold) rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold) + comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold) + comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold) if output_dir is not None: if not os.path.exists(output_dir): @@ -457,6 +504,10 @@ def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, cla rev_metrics['summary'] = rev_summary rev_macro = macro_metrics(rev_summary) rev_metrics['macro'] = rev_macro + comb_summary = summarize_metrics(comb_metrics) + comb_metrics['summary'] = comb_summary + comb_macro = macro_metrics(comb_summary) + comb_metrics['macro'] = comb_macro if output_dir is not None: metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml') with open(metrics_fp, 'w') as f: @@ -465,7 +516,8 @@ def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, cla # summarize and save confusion matrix confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels) rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels) - if output_dir is not None: - plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}") + comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels) + #if output_dir is not None: + #plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}") - return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary + return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary diff --git a/voxaboxen/evaluation/raven_utils.py b/voxaboxen/evaluation/raven_utils.py index 7178452..af4fea6 100644 --- a/voxaboxen/evaluation/raven_utils.py +++ b/voxaboxen/evaluation/raven_utils.py @@ -20,58 +20,58 @@ def __init__(self, label_set = None, unknown_label = None): self.matched_predictions = None self.label_set = label_set self.unknown_label = unknown_label - + def load_selection_table(self, fp, view = None, label_mapping = None): # view (str) : If applicable, Waveform or Spectrogram to avoid double counting # label_mapping : dict {old label : new label}. If not None, will drop annotations not in keys of label_mapping - - + + annotations = pd.read_csv(fp, delimiter = '\t') if view is None and 'View' in annotations: views = annotations['View'].unique() if len(views)>1: warnings.warn(f"I found more than one view in selection table. To avoid double counting, pass view as a parameter. Views found: {view}") - + if view is not None: annotations = annotations[annotations['View'].str.contains('Waveform')].reset_index() - + if label_mapping is not None: annotations['Annotation'] = annotations['Annotation'].map(label_mapping) annotations = annotations[~pd.isnull(annotations['Annotation'])] - + return annotations - + def load_audio(self, fp): self.samples, self.sr = librosa.load(fp, sr = None) self.duration = len(self.samples) / self.sr - + def play_audio(self, start_sec, end_sec): start_sample = int(self.sr * start_sec) end_sample = int(self.sr *end_sec) display(ipd.Audio(self.samples[start_sample:end_sample], rate = self.sr)) - + def load_annotations(self, fp, view = None, label_mapping = None): self.annotations = self.load_selection_table(fp, view = view, label_mapping = label_mapping) self.annotations['index'] = self.annotations.index - + def threshold_class_predictions(self, class_threshold): # If class probability is below a threshold, switch label to unknown - + assert self.unknown_label is not None for i in self.predictions.index: if self.predictions.loc[i, 'Class Prob'] < class_threshold: - self.predictions.at[i, 'Annotation'] = self.unknown_label - + self.predictions.at[i, 'Annotation'] = self.unknown_label + def refine_annotations(self): print("Not implemented! Could implement refining annotations by SNR to remove quiet vocs") - + def refine_predictions(self): print("Not implemented! Could implement refining predictions by SNR to remove quiet vocs") - + def load_predictions(self, fp, view = None, label_mapping = None): self.predictions = self.load_selection_table(fp, view = view, label_mapping = label_mapping) self.predictions['index'] = self.predictions.index - + def compute_matching(self, IoU_minimum = 0.5): # Bipartite graph matching between predictions and annotations # Maximizes the number of matchings with IoU > IoU_minimum @@ -81,15 +81,15 @@ def compute_matching(self, IoU_minimum = 0.5): self.matching = metrics.match_events(ref, est, min_iou=IoU_minimum, method="fast") self.matched_annotations = [p[0] for p in self.matching] self.matched_predictions = [p[1] for p in self.matching] - - def evaluate(self): - + + def evaluate(self): + if self.label_set is None: TP = len(self.matching) FP = len(self.predictions) - TP FN = len(self.annotations) - TP return {'all' : {'TP' : TP, 'FP' : FP, 'FN' : FN}} - + else: out = {label : {'TP':0, 'FP':0, 'FN' : 0} for label in self.label_set} pred_label = np.array(self.predictions['Annotation']) @@ -97,22 +97,22 @@ def evaluate(self): for p in self.matching: annotation = annot_label[p[0]] prediction = pred_label[p[1]] - + if self.unknown_label is not None and prediction == self.unknown_label: pass # treat predicted unknowns as no predictions for these metrics elif annotation == prediction: out[annotation]['TP'] += 1 elif self.unknown_label is not None and annotation == self.unknown_label: out[prediction]['FP'] -= 1 #adjust FP for unknown labels - + for label in self.label_set: n_annot = int((annot_label == label).sum()) n_pred = int((pred_label == label).sum()) out[label]['FP'] = out[label]['FP'] + n_pred - out[label]['TP'] out[label]['FN'] = out[label]['FN'] + n_annot - out[label]['TP'] - + return out - + def confusion_matrix(self): if self.label_set is None: return None @@ -125,10 +125,10 @@ def confusion_matrix(self): confusion_matrix = np.zeros((confusion_matrix_size, confusion_matrix_size)) cm_nobox_idx = confusion_matrix_labels.index('None') - + pred_label = np.array(self.predictions['Annotation']) annot_label = np.array(self.annotations['Annotation']) - + for p in self.matching: annotation = annot_label[p[0]] prediction = pred_label[p[1]] @@ -139,21 +139,21 @@ def confusion_matrix(self): for label in confusion_matrix_labels: if label == 'None': continue - + # count false positive and false negative detections, regardless of class cm_label_idx = confusion_matrix_labels.index(label) - + #fp n_pred = int((pred_label == label).sum()) n_positive_detections_row = confusion_matrix.sum(1)[cm_label_idx] n_false_detections = n_pred - n_positive_detections_row confusion_matrix[cm_label_idx, cm_nobox_idx] = n_false_detections - + #fn n_annot = int((annot_label == label).sum()) n_positive_detections_col = confusion_matrix.sum(0)[cm_label_idx] n_missed_detections = n_annot - n_positive_detections_col confusion_matrix[cm_nobox_idx, cm_label_idx] = n_missed_detections - + return confusion_matrix, confusion_matrix_labels - \ No newline at end of file + diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py index 6a230c4..79520d4 100644 --- a/voxaboxen/training/train.py +++ b/voxaboxen/training/train.py @@ -63,10 +63,10 @@ def train(model, args): yaml.dump(train_evals_by_epoch, f) if use_val: - val_eval, rev_eval = val_epoch(model, t, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args) + eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args) # TODO: maybe plot rev-evals - val_evals.append(val_eval.copy()) - plot_eval(train_evals, learning_rates, args, val_evals = val_evals) + val_evals.append(comb_eval_scores.copy()) + plot_eval(train_evals, learning_rates, args, val_evals=val_evals) val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)} val_evals_fp = os.path.join(args.experiment_dir, "val_history.yaml") @@ -77,7 +77,7 @@ def train(model, args): scheduler.step() if use_val and args.early_stopping: - current_f1 = val_eval['f1'] + current_f1 = comb_eval_scores['f1'] if current_f1 > best_f1: print('found new best model') best_f1 = current_f1 @@ -93,7 +93,7 @@ def train(model, args): torch.save( checkpoint_dict, - os.path.join(args.experiment_dir, f"model.pt"), + os.path.join(args.experiment_dir, "model.pt"), ) else: @@ -108,18 +108,18 @@ def train(model, args): torch.save( checkpoint_dict, - os.path.join(args.experiment_dir, f"model.pt"), + os.path.join(args.experiment_dir, "model.pt"), ) print("Done!") - cp = torch.load(os.path.join(args.experiment_dir, f"model.pt")) + cp = torch.load(os.path.join(args.experiment_dir, "model.pt")) model.load_state_dict(cp["model_state_dict"]) # resave validation with best model if use_val: - val_epoch(model, t+1, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args) + val_epoch(model, t+1, val_dataloader, args) return model @@ -215,7 +215,7 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss optimizer.step() if i > 10: - data_iterator.set_description(f"loss {np.mean(losses[-10:]):.6f}, det {np.mean(detection_losses[-10:]):.6f}, reg {np.mean(regression_losses[-10:]):.6f}, class {np.mean(class_losses[-10:]):.6f} revloss {np.mean(rev_losses[-10:]):.6f}, revdet {np.mean(rev_detection_losses[-10:]):.6f}, revreg {np.mean(rev_regression_losses[-10:]):.6f}, revclass {np.mean(rev_class_losses[-10:]):.6f}") + data_iterator.set_description(f"loss {np.mean(losses[-10:]):.5f}, det {np.mean(detection_losses[-10:]):.5f}, reg {np.mean(regression_losses[-10:]):.5f}, class {np.mean(class_losses[-10:]):.5f} revloss {np.mean(rev_losses[-10:]):.5f}, revdet {np.mean(rev_detection_losses[-10:]):.5f}, revreg {np.mean(rev_regression_losses[-10:]):.5f}, revclass {np.mean(rev_class_losses[-10:]):.5f}") if args.is_test and i == 15: break @@ -225,27 +225,29 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss print(f"Epoch {t} | Train loss: {train_loss:1.3f}") return model, evals -def val_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args): +def val_epoch(model, t, dataloader, args): model.eval() manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False) - e, _, rev_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) - - summary = e['summary'] + e, _, rev_e, _, comb_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) evals = {k:[] for k in ['precision','recall','f1']} rev_evals = {k:[] for k in ['precision','recall','f1']} + comb_evals = {k:[] for k in ['precision','recall','f1']} for k in ['precision','recall','f1']: for l in args.label_set: m = e['summary'][l][k] rev_m = rev_e['summary'][l][k] + comb_m = comb_e['summary'][l][k] evals[k].append(m) rev_evals[k].append(rev_m) + comb_evals[k].append(comb_m) evals[k] = float(np.mean(evals[k])) rev_evals[k] = float(np.mean(rev_evals[k])) + comb_evals[k] = float(np.mean(comb_evals[k])) - print(f"Epoch {t} | Val scores @{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} rev_prec: {rev_evals['precision']:1.3f} rev_rec: {rev_evals['recall']:1.3f} rev_F1: {rev_evals['f1']:1.3f}") - return evals, rev_evals + print(f"Epoch {t} | val@{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}") + return evals, rev_evals, comb_evals def modified_focal_loss(pred, gt, pos_loss_weight = 1): # Modified from https://github.com/xingyizhou/CenterNet/blob/2b7692c377c6686fb35e473dac2de6105eed62c6/src/lib/models/losses.py diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index a81700a..0a40039 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -40,7 +40,11 @@ def train_model(args): for iou in [0.2, 0.5, 0.8]: for class_threshold in [0.0, 0.5, 0.95]: - metrics, conf_mat, rev_metrics, rev_conf_mat = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) + metrics, conf_mat, rev_metrics, rev_conf_mat, comb_metrics, comb_conf_mat = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) + print(f'IOU: {iou} class_thresh: {class_threshold}') + print('Fwd:', metrics['summary']) + print('Bck:', rev_metrics['summary']) + print('Comb:', comb_metrics['summary'], '\n') if __name__ == "__main__": train_model(sys.argv[1:]) From 289aa3e97830cb7d28cd7c3b4d1452a918e006f6 Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Tue, 9 Apr 2024 18:17:09 +0100 Subject: [PATCH 03/11] add comb_thresh, but haven't tested yet that it's doing exactly what it should --- voxaboxen/evaluation/evaluation.py | 5 +++-- voxaboxen/training/params.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index cd00ea7..c176c76 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -417,7 +417,7 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose) rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose) - comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn) + comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold) annotations_fp = dataloader_dict[fn].dataset.annot_fp @@ -430,7 +430,7 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'annotations_fp' : annotations_fps}) return manifest -def combine_fwd_bck_preds(target_dir, fn): +def combine_fwd_bck_preds(target_dir, fn, discard_threshold): fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}.txt') bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-rev.txt') comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt') @@ -459,6 +459,7 @@ def combine_fwd_bck_preds(target_dir, fn): bck_unmatched = select_from_neg_idxs(bck_preds, bck_matched_idxs) comb_preds = pd.concat([match_preds, fwd_unmatched, bck_unmatched]) assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching) + comb_preds = comb_preds.loc[comb_preds['Detection Prob']>discard_threshold] comb_preds.sort_values('Begin Time (s)') comb_preds.index = list(range(len(comb_preds))) diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py index 6af6ccd..a4775df 100644 --- a/voxaboxen/training/params.py +++ b/voxaboxen/training/params.py @@ -32,6 +32,7 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--previous-checkpoint-fp', type=str, default=None, help="path to checkpoint of previously trained detection model") parser.add_argument('--aves-url', type=str, default = "https://storage.googleapis.com/esp-public-files/ported_aves/aves-base-bio.torchaudio.pt") parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo") + parser.add_argument('--comb-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold") # Training parser.add_argument('--batch-size', type=int, default=32) From ca0622d444983de05a0147fbe45a7a83192c1931 Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Sun, 14 Apr 2024 12:59:57 +0100 Subject: [PATCH 04/11] track F1 of matched-preds, and refactor to loop through different pred types --- voxaboxen/evaluation/evaluation.py | 117 ++++++++++++++++------------- voxaboxen/training/train.py | 54 +++++++------ voxaboxen/training/train_model.py | 10 +-- 3 files changed, 103 insertions(+), 78 deletions(-) diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index c176c76..cb09eac 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -144,6 +144,9 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True): all_rev_regressions.append(rev_regression) all_rev_classifs.append(rev_classif) + if args.is_test and i==15: + break + all_detections = torch.cat(all_detections) all_regressions = torch.cat(all_regressions) all_classifs = torch.cat(all_classifs) @@ -219,14 +222,16 @@ def generate_features(model, single_clip_dataloader, args, verbose = True): return all_features.detach().cpu().numpy() -#def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0): -def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, classif_threshold=0): +#def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0): +def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, classif_threshold=0): if target_dir is None: target_dir = args.experiment_output_dir - if is_rev: - fn += '-rev' + if is_bck: + fn += '-bck' + else: + fn += '-fwd' # Debugging # # target_fp = os.path.join(target_dir, f"dets_{fn}.npy") @@ -266,7 +271,7 @@ def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=Tr pred_sr = args.sr // (args.scale_factor * args.prediction_scale_factor) - bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_rev) + bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_bck) if args.nms == "soft_nms": bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=args.detection_threshold) @@ -407,33 +412,36 @@ def summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels): def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): fns = [] - predictions_fps = [] - rev_predictions_fps = [] + fwd_predictions_fps = [] + bck_predictions_fps = [] comb_predictions_fps = [] + match_predictions_fps = [] annotations_fps = [] for fn in dataloader_dict: - detections, regressions, classifications, rev_detections, rev_regressions, rev_classifications = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose) + fwd_detections, fwd_regressions, fwd_classifications, bck_detections, bck_regressions, bck_classifications = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose) - predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose) - rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose) - comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold) + fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose) + bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose) + comb_predictions_fp, match_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold) annotations_fp = dataloader_dict[fn].dataset.annot_fp fns.append(fn) - predictions_fps.append(predictions_fp) - rev_predictions_fps.append(rev_predictions_fp) + fwd_predictions_fps.append(fwd_predictions_fp) + bck_predictions_fps.append(bck_predictions_fp) comb_predictions_fps.append(comb_predictions_fp) + match_predictions_fps.append(match_predictions_fp) annotations_fps.append(annotations_fp) - manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'annotations_fp' : annotations_fps}) + manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps}) return manifest def combine_fwd_bck_preds(target_dir, fn, discard_threshold): - fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}.txt') - bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-rev.txt') + fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-fwd.txt') + bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-bck.txt') comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt') + match_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-match.txt') fwd_preds = pd.read_csv(fwd_preds_fp, sep='\t') bck_preds = pd.read_csv(bck_preds_fp, sep='\t') @@ -451,7 +459,7 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold): match_pred['Detection Prob'] = 1 - (1-match_pred['Detection Prob'])*(1-bck_pred['Detection Prob']) match_preds_list.append(match_pred) - match_preds = pd.DataFrame(match_preds_list) + match_preds = pd.DataFrame(match_preds_list, columns=fwd_preds.columns) # Now include the union of all that weren't matched fwd_matched_idxs = [m[0] for m in c.matching] bck_matched_idxs = [m[1] for m in c.matching] @@ -464,61 +472,68 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold): comb_preds.index = list(range(len(comb_preds))) comb_preds.to_csv(comb_preds_fp, sep='\t', index=False) - return comb_preds_fp + match_preds.to_csv(match_preds_fp, sep='\t', index=False) + return comb_preds_fp, match_preds_fp def select_from_neg_idxs(df, neg_idxs): bool_mask = [i not in neg_idxs for i in range(len(df))] return df.loc[bool_mask] def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0): - metrics = {} - confusion_matrix = {} - rev_metrics = {} - rev_confusion_matrix = {} - comb_metrics = {} - comb_confusion_matrix = {} + pred_types = ('fwd', 'bck', 'comb', 'match') + metrics = {p:{} for p in pred_types} + conf_mats = {p:{} for p in pred_types} + #conf_mat_labels = {p:{} for p in pred_types} + conf_mat_labels = {} + #rev_metrics = {} + #rev_confusion_matrix = {} + #comb_metrics = {} + #comb_confusion_matrix = {} for i, row in manifest.iterrows(): fn = row['filename'] - predictions_fp = row['predictions_fp'] - rev_predictions_fp = row['rev_predictions_fp'] - comb_predictions_fp = row['comb_predictions_fp'] - annotations_fp = row['annotations_fp'] - - metrics[fn] = get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold) - confusion_matrix[fn], confusion_matrix_labels = get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold) - rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold) - rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold) - comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold) - comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold) + annots_fp = row['annotations_fp'] + for pred_type in pred_types: + preds_fp = row[f'{pred_type}_predictions_fp'] + metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold) + conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold) + #rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold) + #rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold) + #comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold) + #comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold) if output_dir is not None: if not os.path.exists(output_dir): os.makedirs(output_dir) # summarize and save metrics - summary = summarize_metrics(metrics) - metrics['summary'] = summary - macro = macro_metrics(summary) - metrics['macro'] = macro - rev_summary = summarize_metrics(rev_metrics) - rev_metrics['summary'] = rev_summary - rev_macro = macro_metrics(rev_summary) - rev_metrics['macro'] = rev_macro - comb_summary = summarize_metrics(comb_metrics) - comb_metrics['summary'] = comb_summary - comb_macro = macro_metrics(comb_summary) - comb_metrics['macro'] = comb_macro + conf_mat_summaries = {} + for pred_type in ('fwd', 'bck', 'comb', 'match'): + summary = summarize_metrics(metrics[pred_type]) + metrics[pred_type]['summary'] = summary + metrics[pred_type]['macro'] = macro_metrics(summary) + conf_mat_summaries[pred_type], confusion_matrix_labels = summarize_confusion_matrix(conf_mats[pred_type], conf_mat_labels[pred_type]) + #macro = macro_metrics(summary) + #metrics['macro'] = macro + #rev_summary = summarize_metrics(rev_metrics) + #rev_metrics['summary'] = rev_summary + #rev_macro = macro_metrics(rev_summary) + #rev_metrics['macro'] = rev_macro + #comb_summary = summarize_metrics(comb_metrics) + #comb_metrics['summary'] = comb_summary + #comb_macro = macro_metrics(comb_summary) + #comb_metrics['macro'] = comb_macro if output_dir is not None: metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml') with open(metrics_fp, 'w') as f: yaml.dump(metrics, f) # summarize and save confusion matrix - confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels) - rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels) - comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels) + #confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels) + #rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels) + #comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels) #if output_dir is not None: #plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}") - return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary + #return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary + return metrics, conf_mat_summaries diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py index 79520d4..6fe1a19 100644 --- a/voxaboxen/training/train.py +++ b/voxaboxen/training/train.py @@ -63,9 +63,11 @@ def train(model, args): yaml.dump(train_evals_by_epoch, f) if use_val: - eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args) + #eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args) + eval_scores = val_epoch(model, t, val_dataloader, args) + #for pt,pt_es in eval_scores.items(): # TODO: maybe plot rev-evals - val_evals.append(comb_eval_scores.copy()) + val_evals.append(eval_scores['comb'].copy()) plot_eval(train_evals, learning_rates, args, val_evals=val_evals) val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)} @@ -77,7 +79,7 @@ def train(model, args): scheduler.step() if use_val and args.early_stopping: - current_f1 = comb_eval_scores['f1'] + current_f1 = eval_scores['comb']['f1'] if current_f1 > best_f1: print('found new best model') best_f1 = current_f1 @@ -229,25 +231,33 @@ def val_epoch(model, t, dataloader, args): model.eval() manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False) - e, _, rev_e, _, comb_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) - - evals = {k:[] for k in ['precision','recall','f1']} - rev_evals = {k:[] for k in ['precision','recall','f1']} - comb_evals = {k:[] for k in ['precision','recall','f1']} - for k in ['precision','recall','f1']: - for l in args.label_set: - m = e['summary'][l][k] - rev_m = rev_e['summary'][l][k] - comb_m = comb_e['summary'][l][k] - evals[k].append(m) - rev_evals[k].append(rev_m) - comb_evals[k].append(comb_m) - evals[k] = float(np.mean(evals[k])) - rev_evals[k] = float(np.mean(rev_evals[k])) - comb_evals[k] = float(np.mean(comb_evals[k])) - - print(f"Epoch {t} | val@{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}") - return evals, rev_evals, comb_evals + #e, _, rev_e, _, comb_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) + e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) + + print(f"Epoch {t} | val@{args.model_selection_iou}IoU:") + evals = {} + for pt in e.keys(): + evals[pt] = {k:[] for k in ['precision','recall','f1']} + #evals = {k:[] for k in ['precision','recall','f1']} + #rev_evals = {k:[] for k in ['precision','recall','f1']} + #comb_evals = {k:[] for k in ['precision','recall','f1']} + for k in ['precision','recall','f1']: + for l in args.label_set: + m = e[pt]['summary'][l][k] + #rev_m = rev_e['summary'][l][k] + #comb_m = comb_e['summary'][l][k] + evals[pt][k].append(m) + #rev_evals[k].append(rev_m) + #comb_evals[k].append(comb_m) + evals[pt][k] = float(np.mean(evals[pt][k])) + #rev_evals[k] = float(np.mean(rev_evals[k])) + #comb_evals[k] = float(np.mean(comb_evals[k])) + + print(f"{pt}prec: {evals[pt]['precision']:1.3f} {pt}rec: {evals[pt]['recall']:1.3f} {pt}F1: {evals[pt]['f1']:1.3f}", end=' ') + #revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}") + #return evals, rev_evals, comb_evals + print() + return evals def modified_focal_loss(pred, gt, pos_loss_weight = 1): # Modified from https://github.com/xingyizhou/CenterNet/blob/2b7692c377c6686fb35e473dac2de6105eed62c6/src/lib/models/losses.py diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index 0a40039..62e87c7 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -38,13 +38,13 @@ def train_model(args): manifest = predict_and_generate_manifest(trained_model, test_dataloader, args) + class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95] for iou in [0.2, 0.5, 0.8]: - for class_threshold in [0.0, 0.5, 0.95]: - metrics, conf_mat, rev_metrics, rev_conf_mat, comb_metrics, comb_conf_mat = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) + for class_threshold in class_threshes: + metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) print(f'IOU: {iou} class_thresh: {class_threshold}') - print('Fwd:', metrics['summary']) - print('Bck:', rev_metrics['summary']) - print('Comb:', comb_metrics['summary'], '\n') + for pred_type in metrics.keys(): + print(f'{pred_type}:', {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()}) if __name__ == "__main__": train_model(sys.argv[1:]) From 84994c43e9f411566794b2966669ad7d60545b26 Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Fri, 19 Apr 2024 19:42:30 +0100 Subject: [PATCH 05/11] simple grid search on MT --- MT-grid-search.sh | 12 ++++++++++++ read_grid_search.py | 26 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 MT-grid-search.sh create mode 100644 read_grid_search.py diff --git a/MT-grid-search.sh b/MT-grid-search.sh new file mode 100644 index 0000000..b8a95cd --- /dev/null +++ b/MT-grid-search.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +for detthresh in 0.55; do +#for detthresh in 0.4; do + for combiouthresh in 0.5 0.55 0.6; do + #for combiouthresh in 0.4; do + for combdiscardthresh in 0.8 0.85 0.9; do + combdiscardthresh2=$(echo ${combdiscardthresh}-0.075 | bc -l) + (trap 'kill 0' SIGINT; python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh} & python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh2} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh2} & wait) + done + done +done diff --git a/read_grid_search.py b/read_grid_search.py new file mode 100644 index 0000000..de3001e --- /dev/null +++ b/read_grid_search.py @@ -0,0 +1,26 @@ +import yaml +from os.path import join +import os +import pandas as pd + + +all_results = {} +for detthresh in (0.4, 0.55, 0.7, 0.85): + all_results[detthresh] = {} + for combiouthresh in (0.4, 0.55, 0.7, 0.85): + all_results[detthresh][combiouthresh] = {} + for combdiscardthresh in (0.4, 0.55, 0.7, 0.85): + all_results[detthresh][combiouthresh][combdiscardthresh] = {} + resdir = f'projects/MT_experiment/bidirectional-{detthresh}-{combiouthresh}-{combdiscardthresh}/test_results' + if not os.path.exists(resdir): + continue + results = {} + for iouf1 in (2,5,8): + with open(join(resdir, f'metrics_iou_0.{iouf1}_class_threshold_0.yaml')) as f: + exp_results = yaml.safe_load(f) + for pred_type in ('fwd','bck','comb','match'): + results[f'testiou{iouf1}-{pred_type}'] = exp_results[pred_type]['macro']['f1'] + all_results[detthresh][combiouthresh][combdiscardthresh] = results + +breakpoint() + From d7b5b5c9ec3f345237067cd6768b842785b9e34c Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Fri, 19 Apr 2024 19:44:04 +0100 Subject: [PATCH 06/11] get running on powdermill, refactor to use threshes in evaluate manifest only --- datasets/powdermill/process_powdermill.py | 49 ++++----- voxaboxen/evaluation/evaluation.py | 116 +++++++++------------- voxaboxen/project/project_setup.py | 19 ++-- voxaboxen/training/params.py | 5 +- voxaboxen/training/train.py | 35 ++----- voxaboxen/training/train_model.py | 28 ++++-- 6 files changed, 115 insertions(+), 137 deletions(-) diff --git a/datasets/powdermill/process_powdermill.py b/datasets/powdermill/process_powdermill.py index 2a52b7d..ed47e56 100644 --- a/datasets/powdermill/process_powdermill.py +++ b/datasets/powdermill/process_powdermill.py @@ -11,86 +11,86 @@ def main(): cwd = os.getcwd() - + raw_data_dir = os.path.join(cwd, 'raw') raw_annot_dir = os.path.join(raw_data_dir, 'selection_tables') audio_dir = os.path.join(cwd, 'soundscape_data') - + formatted_data_dir = os.path.join(cwd, 'formatted') formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables') for d in [formatted_data_dir, formatted_annot_dir]: if not os.path.exists(d): os.makedirs(d) - + raw_annotations_fps = sorted(glob(os.path.join(cwd, 'raw', 'selection_tables', '*.txt'))) - + train_proportion = 0.6 val_proportion = 0.2 - + train_audio_fps = [] val_audio_fps = [] test_audio_fps = [] - + for i in range(1,5): audio_fps = sorted(glob(os.path.join(audio_dir, f"Recording_{i}_*"))) n_train = int(train_proportion * len(audio_fps)) n_val = int(val_proportion * len(audio_fps)) - + train_audio_fps.extend(audio_fps[:n_train]) val_audio_fps.extend(audio_fps[n_train:n_train+n_val]) test_audio_fps.extend(audio_fps[n_train+n_val:]) - - + + train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps] val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps] test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps] - + train_annot_fps = [] val_annot_fps = [] test_annot_fps = [] - + for fn, audio_fp in zip(train_fns, train_audio_fps): annot_fn = f"{fn}.Table.1.selections.txt" raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn) - + annot_df = pd.read_csv(raw_annotations_fp, sep = '\t') annot_df['Annotation'] = annot_df['Species'] annot_df = annot_df.drop('Species', axis=1) - + annot_fn = f"{fn}.Table.1.selections.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + annot_df.to_csv(annot_fp, sep = '\t', index = False) train_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(val_fns, val_audio_fps): annot_fn = f"{fn}.Table.1.selections.txt" raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn) - + annot_df = pd.read_csv(raw_annotations_fp, sep = '\t') annot_df['Annotation'] = annot_df['Species'] annot_df = annot_df.drop('Species', axis=1) - + annot_fn = f"{fn}.Table.1.selections.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + annot_df.to_csv(annot_fp, sep = '\t', index = False) val_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(test_fns, test_audio_fps): annot_fn = f"{fn}.Table.1.selections.txt" raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn) - + annot_df = pd.read_csv(raw_annotations_fp, sep = '\t') annot_df['Annotation'] = annot_df['Species'] annot_df = annot_df.drop('Species', axis=1) - + annot_fn = f"{fn}.Table.1.selections.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + annot_df.to_csv(annot_fp, sep = '\t', index = False) test_annot_fps.append(annot_fp) - + train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps}) train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv') train_info_df.to_csv(train_info_fp, index = False) @@ -100,6 +100,7 @@ def main(): test_info_df = pd.DataFrame({'fn' : test_fns, 'audio_fp' : test_audio_fps, 'selection_table_fp' : test_annot_fps}) test_info_fp = os.path.join(formatted_data_dir, 'test_info.csv') test_info_df.to_csv(test_info_fp, index = False) + breakpoint() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index cb09eac..d0d8e81 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -414,8 +414,8 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): fns = [] fwd_predictions_fps = [] bck_predictions_fps = [] - comb_predictions_fps = [] - match_predictions_fps = [] + #comb_predictions_fps = [] + #match_predictions_fps = [] annotations_fps = [] for fn in dataloader_dict: @@ -423,21 +423,53 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose) bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose) - comb_predictions_fp, match_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold) - annotations_fp = dataloader_dict[fn].dataset.annot_fp fns.append(fn) fwd_predictions_fps.append(fwd_predictions_fp) bck_predictions_fps.append(bck_predictions_fp) - comb_predictions_fps.append(comb_predictions_fp) - match_predictions_fps.append(match_predictions_fp) annotations_fps.append(annotations_fp) - manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps}) + #manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps}) + manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'annotations_fp' : annotations_fps}) return manifest -def combine_fwd_bck_preds(target_dir, fn, discard_threshold): +def evaluate_based_on_manifest(manifest, args, output_dir, iou, class_threshold, comb_discard_threshold): + pred_types = ('fwd', 'bck', 'comb', 'match') + metrics = {p:{} for p in pred_types} + conf_mats = {p:{} for p in pred_types} + conf_mat_labels = {} + + for i, row in manifest.iterrows(): + fn = row['filename'] + annots_fp = row['annotations_fp'] + row['comb_predictions_fp'], row['match_predictions_fp'] = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=comb_discard_threshold) + + for pred_type in pred_types: + preds_fp = row[f'{pred_type}_predictions_fp'] + metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold) + conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold) + + if output_dir is not None: + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # summarize and save metrics + conf_mat_summaries = {} + for pred_type in ('fwd', 'bck', 'comb', 'match'): + summary = summarize_metrics(metrics[pred_type]) + metrics[pred_type]['summary'] = summary + metrics[pred_type]['macro'] = macro_metrics(summary) + conf_mat_summaries[pred_type], confusion_matrix_labels = summarize_confusion_matrix(conf_mats[pred_type], conf_mat_labels[pred_type]) + if output_dir is not None: + metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml') + with open(metrics_fp, 'w') as f: + yaml.dump(metrics, f) + + # summarize and save confusion matrix + return metrics, conf_mat_summaries + +def combine_fwd_bck_preds(target_dir, fn, comb_iou_threshold, comb_discard_threshold): fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-fwd.txt') bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-bck.txt') comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt') @@ -448,8 +480,7 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold): c = Clip() c.load_annotations(fwd_preds_fp) c.load_predictions(bck_preds_fp) - c.compute_matching(IoU_minimum=0.5) - #comb_preds = fwd_preds.copy() + c.compute_matching(IoU_minimum=comb_iou_threshold) match_preds_list = [] for fp, bp in c.matching: match_pred = fwd_preds.loc[fp].copy() @@ -465,9 +496,11 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold): bck_matched_idxs = [m[1] for m in c.matching] fwd_unmatched = select_from_neg_idxs(fwd_preds, fwd_matched_idxs) bck_unmatched = select_from_neg_idxs(bck_preds, bck_matched_idxs) - comb_preds = pd.concat([match_preds, fwd_unmatched, bck_unmatched]) + to_concat = [x for x in [match_preds, fwd_unmatched, bck_unmatched] if x.shape[0]>0] + comb_preds = pd.concat(to_concat) if len(to_concat)>0 else fwd_preds assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching) - comb_preds = comb_preds.loc[comb_preds['Detection Prob']>discard_threshold] + comb_preds = comb_preds.loc[comb_preds['Detection Prob']>comb_discard_threshold] + #print(f'Using combdiscardthresh {args.comb_discard_threshold} and comb_preds has shape {comb_preds.shape}') comb_preds.sort_values('Begin Time (s)') comb_preds.index = list(range(len(comb_preds))) @@ -478,62 +511,3 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold): def select_from_neg_idxs(df, neg_idxs): bool_mask = [i not in neg_idxs for i in range(len(df))] return df.loc[bool_mask] - -def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0): - pred_types = ('fwd', 'bck', 'comb', 'match') - metrics = {p:{} for p in pred_types} - conf_mats = {p:{} for p in pred_types} - #conf_mat_labels = {p:{} for p in pred_types} - conf_mat_labels = {} - #rev_metrics = {} - #rev_confusion_matrix = {} - #comb_metrics = {} - #comb_confusion_matrix = {} - - for i, row in manifest.iterrows(): - fn = row['filename'] - annots_fp = row['annotations_fp'] - for pred_type in pred_types: - preds_fp = row[f'{pred_type}_predictions_fp'] - metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold) - conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold) - #rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold) - #rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold) - #comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold) - #comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold) - - if output_dir is not None: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - # summarize and save metrics - conf_mat_summaries = {} - for pred_type in ('fwd', 'bck', 'comb', 'match'): - summary = summarize_metrics(metrics[pred_type]) - metrics[pred_type]['summary'] = summary - metrics[pred_type]['macro'] = macro_metrics(summary) - conf_mat_summaries[pred_type], confusion_matrix_labels = summarize_confusion_matrix(conf_mats[pred_type], conf_mat_labels[pred_type]) - #macro = macro_metrics(summary) - #metrics['macro'] = macro - #rev_summary = summarize_metrics(rev_metrics) - #rev_metrics['summary'] = rev_summary - #rev_macro = macro_metrics(rev_summary) - #rev_metrics['macro'] = rev_macro - #comb_summary = summarize_metrics(comb_metrics) - #comb_metrics['summary'] = comb_summary - #comb_macro = macro_metrics(comb_summary) - #comb_metrics['macro'] = comb_macro - if output_dir is not None: - metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml') - with open(metrics_fp, 'w') as f: - yaml.dump(metrics, f) - - # summarize and save confusion matrix - #confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels) - #rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels) - #comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels) - #if output_dir is not None: - #plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}") - - #return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary - return metrics, conf_mat_summaries diff --git a/voxaboxen/project/project_setup.py b/voxaboxen/project/project_setup.py index 09f9a8d..cd38721 100644 --- a/voxaboxen/project/project_setup.py +++ b/voxaboxen/project/project_setup.py @@ -7,37 +7,38 @@ def project_setup(args): args = parse_project_args(args) - + if not os.path.exists(args.project_dir): os.makedirs(args.project_dir) - + all_annots = [] for info_fp in [args.train_info_fp, args.val_info_fp, args.test_info_fp]: if info_fp is None: continue - + info = pd.read_csv(info_fp) annot_fps = list(info['selection_table_fp']) - + for annot_fp in annot_fps: if annot_fp != "None": selection_table = pd.read_csv(annot_fp, delimiter = '\t') annots = list(selection_table['Annotation']) all_annots.extend(annots) - + label_set = sorted(set(all_annots)) label_mapping = {x : x for x in label_set} + breakpoint() label_mapping['Unknown'] = 'Unknown' unknown_label = 'Unknown' - + if unknown_label in label_set: label_set.remove(unknown_label) - + setattr(args, "label_set", label_set) setattr(args, "label_mapping", label_mapping) setattr(args, "unknown_label", unknown_label) - + save_params(args) if __name__ == "__main__": - project_setup(sys.argv[1:]) \ No newline at end of file + project_setup(sys.argv[1:]) diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py index a4775df..1e580db 100644 --- a/voxaboxen/training/params.py +++ b/voxaboxen/training/params.py @@ -13,6 +13,7 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--name', type = str, required=True) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--is_test', '-t', action='store_true') + parser.add_argument('--overwrite', action='store_true') # Data parser.add_argument('--project-config-fp', type = str, required=True) @@ -32,7 +33,9 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--previous-checkpoint-fp', type=str, default=None, help="path to checkpoint of previously trained detection model") parser.add_argument('--aves-url', type=str, default = "https://storage.googleapis.com/esp-public-files/ported_aves/aves-base-bio.torchaudio.pt") parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo") - parser.add_argument('--comb-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold") + parser.add_argument('--comb-discard-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold") + parser.add_argument('--comb-iou-threshold', type=float, default=0.5, help="discard combined detections whose prob is below this threshold") + parser.add_argument('--reload-from', type=str) # Training parser.add_argument('--batch-size', type=int, default=32) diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py index 6fe1a19..f4cfc28 100644 --- a/voxaboxen/training/train.py +++ b/voxaboxen/training/train.py @@ -63,10 +63,8 @@ def train(model, args): yaml.dump(train_evals_by_epoch, f) if use_val: - #eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args) eval_scores = val_epoch(model, t, val_dataloader, args) - #for pt,pt_es in eval_scores.items(): - # TODO: maybe plot rev-evals + # TODO: maybe plot evals for other pred_types val_evals.append(eval_scores['comb'].copy()) plot_eval(train_evals, learning_rates, args, val_evals=val_evals) @@ -125,7 +123,7 @@ def train(model, args): return model -def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn): +def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn, class_loss_fn): end_mask_perc = args.end_mask_perc end_mask_dur = int(det_preds.size(1)*end_mask_perc) @@ -135,13 +133,15 @@ def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn): reg_preds_clipped = reg_preds[:,end_mask_dur:-end_mask_dur] regs_clipped = regs[:,end_mask_dur:-end_mask_dur] - #y_preds_clipped = y_preds[:,end_mask_dur:-end_mask_dur,:] y_clipped = y[:,end_mask_dur:-end_mask_dur,:] detection_loss = modified_focal_loss(det_preds_clipped, dets_clipped, pos_loss_weight=args.pos_loss_weight) reg_loss = reg_loss_fn(reg_preds_clipped, regs_clipped, dets_clipped, y_clipped) - #class_loss = class_loss_fn(y_preds_clipped, y_clipped, dets_clipped) - class_loss = torch.tensor(0) + if len(args.label_set)==1: + class_loss = torch.tensor(0) + else: + y_preds_clipped = y_preds[:,end_mask_dur:-end_mask_dur,:] + class_loss = class_loss_fn(y_preds_clipped, y_clipped, dets_clipped) return detection_loss, reg_loss, class_loss def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, optimizer, args): @@ -174,8 +174,8 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss # We mask out loss from each end of the clip, so the model isn't forced to learn to detect events that are partially cut off. # This does not affect inference, because during inference we overlap clips at 50% - detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, class_logits, y, args=args, reg_loss_fn=reg_loss_fn) - rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_class_logits, rev_y, args=args, reg_loss_fn=reg_loss_fn) + detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, y, class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn) + rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_y, rev_class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn) normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss loss = (normal_loss + rev_loss)/2 @@ -209,9 +209,6 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss rev_regression_losses.append(args.lamb * rev_reg_loss.item()) rev_class_losses.append(args.rho * rev_class_loss.item()) - #if i > 150: - #breakpoint() - # Backpropagation optimizer.zero_grad() loss.backward() @@ -231,31 +228,19 @@ def val_epoch(model, t, dataloader, args): model.eval() manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False) - #e, _, rev_e, _, comb_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) - e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold) + e, _ = evaluate_based_on_manifest(manifest, args, output_dir=os.path.join(args.experiment_dir, 'val_results'), iou=args.model_selection_iou, class_threshold=args.model_selection_class_threshold, comb_discard_threshold=args.comb_discard_threshold) print(f"Epoch {t} | val@{args.model_selection_iou}IoU:") evals = {} for pt in e.keys(): evals[pt] = {k:[] for k in ['precision','recall','f1']} - #evals = {k:[] for k in ['precision','recall','f1']} - #rev_evals = {k:[] for k in ['precision','recall','f1']} - #comb_evals = {k:[] for k in ['precision','recall','f1']} for k in ['precision','recall','f1']: for l in args.label_set: m = e[pt]['summary'][l][k] - #rev_m = rev_e['summary'][l][k] - #comb_m = comb_e['summary'][l][k] evals[pt][k].append(m) - #rev_evals[k].append(rev_m) - #comb_evals[k].append(comb_m) evals[pt][k] = float(np.mean(evals[pt][k])) - #rev_evals[k] = float(np.mean(rev_evals[k])) - #comb_evals[k] = float(np.mean(comb_evals[k])) print(f"{pt}prec: {evals[pt]['precision']:1.3f} {pt}rec: {evals[pt]['recall']:1.3f} {pt}F1: {evals[pt]['f1']:1.3f}", end=' ') - #revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}") - #return evals, rev_evals, comb_evals print() return evals diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index 62e87c7..be8ee8c 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -1,4 +1,6 @@ +import pandas as pd from voxaboxen.data.data import get_test_dataloader +import torch from voxaboxen.model.model import DetectionModel, DetectionModelStereo from voxaboxen.training.train import train from voxaboxen.training.params import parse_args, set_seed, save_params @@ -16,7 +18,8 @@ def train_model(args): experiment_dir = os.path.join(args.project_dir, args.name) setattr(args, 'experiment_dir', str(experiment_dir)) - if not os.path.exists(args.experiment_dir): + if os.path.exists(args.experiment_dir) and not args.overwrite: + sys.exit('experiment already exists with this name') os.makedirs(args.experiment_dir) experiment_output_dir = os.path.join(experiment_dir, "outputs") @@ -30,21 +33,32 @@ def train_model(args): else: model = DetectionModel(args) + if args.reload_from is not None: + #model.load_state_dict(os.path.join(args.experiment_dir), 'model.pt') + checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt')) + model.load_state_dict(checkpoint['model_state_dict']) + ## Training - trained_model = train(model, args) + if args.n_epochs == 0: + trained_model = model + else: + trained_model = train(model, args) ## Evaluation test_dataloader = get_test_dataloader(args) manifest = predict_and_generate_manifest(trained_model, test_dataloader, args) - class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95] + #class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95] + class_threshes = [0.0, 0.5, 0.95] for iou in [0.2, 0.5, 0.8]: for class_threshold in class_threshes: - metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) - print(f'IOU: {iou} class_thresh: {class_threshold}') - for pred_type in metrics.keys(): - print(f'{pred_type}:', {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()}) + for comb_discard_thresh in [0.85]: + metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=class_threshold, comb_discard_threshold=comb_discard_thresh) + print(f'IOU: {iou} class_thresh: {class_threshold} Comb discard threshold: {comb_discard_thresh}') + for pred_type in metrics.keys(): + to_print = {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()} if len(args.label_set)==1 else dict(pd.DataFrame(metrics[pred_type]['summary']).mean(axis=1).round(4)) + print(f'{pred_type}:', to_print) if __name__ == "__main__": train_model(sys.argv[1:]) From b587878d494bb94f8d1161759d87a37daf878ab9 Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Wed, 24 Apr 2024 09:31:49 +0100 Subject: [PATCH 07/11] fit comb-discard threshold on the val set after training --- datasets/BV/process_BV.py | 54 +++++++++++++++---------------- datasets/hawaii/process_hawaii.py | 41 +++++++++++------------ voxaboxen/training/train_model.py | 37 ++++++++++++++------- 3 files changed, 73 insertions(+), 59 deletions(-) diff --git a/datasets/BV/process_BV.py b/datasets/BV/process_BV.py index edcf1bf..e0b4e36 100644 --- a/datasets/BV/process_BV.py +++ b/datasets/BV/process_BV.py @@ -16,14 +16,14 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion, audio_dur_sec = np.floor(audio_dur_samples / sr) train_audio_dur_samples = int(audio_dur_samples * train_proportion) train_audio_dur_sec = train_audio_dur_samples / sr - + val_audio_dur_samples = int(audio_dur_samples * val_proportion) val_audio_dur_sec = val_audio_dur_samples / sr - + train_audio = audio[:train_audio_dur_samples] val_audio = audio[train_audio_dur_samples:train_audio_dur_samples+val_audio_dur_samples] test_audio = audio[train_audio_dur_samples+val_audio_dur_samples:] - + ys = [] for i, row in annot.iterrows(): y = 'voc' @@ -33,30 +33,30 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion, # if row[label] == 'POS': # y = label ys.append(y) - + begin_time = list(annot['Starttime']) end_time = list(annot['Endtime']) - + selection_table = pd.DataFrame({'Begin Time (s)' : begin_time, 'End Time (s)' : end_time, 'Annotation' : ys, 'Low Freq (Hz)' : [low_hz for x in begin_time], 'High Freq (Hz)' : [high_hz for x in begin_time]}).drop_duplicates() - + train_selection_table = selection_table[selection_table['End Time (s)'] < train_audio_dur_sec].copy() val_selection_table = selection_table[(selection_table['End Time (s)'] >= train_audio_dur_sec) & (selection_table['End Time (s)'] < train_audio_dur_sec + val_audio_dur_sec)].copy() val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'] - train_audio_dur_sec val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'].map(lambda x : max(x, 0)) val_selection_table['End Time (s)'] = val_selection_table['End Time (s)'] - train_audio_dur_sec - + test_selection_table = selection_table[selection_table['Begin Time (s)'] >= train_audio_dur_sec + val_audio_dur_sec].copy() test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec) test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'].map(lambda x : max(x, 0)) test_selection_table['End Time (s)'] = test_selection_table['End Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec) - + return train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio -def main(): +def main(): cwd = os.getcwd() - + raw_data_dir = os.path.join(cwd, 'raw') - + formatted_data_dir = os.path.join(cwd, 'formatted') formatted_audio_dir = os.path.join(formatted_data_dir, 'audio') formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables') @@ -64,65 +64,65 @@ def main(): if not os.path.exists(d): os.makedirs(d) - train_proportion = 0.6 + train_proportion = 0.6 val_proportion = 0.2 - + annotation_fns = sorted(glob(os.path.join(raw_data_dir, '*.csv'))) annotation_fns = [os.path.basename(x) for x in annotation_fns] audio_fns = sorted(glob(os.path.join(raw_data_dir, '*.wav'))) audio_fns = [os.path.basename(x) for x in audio_fns] - + train_fns = [] train_audio_fps = [] train_annot_fps = [] - + val_fns = [] val_audio_fps = [] val_annot_fps = [] - + test_fns = [] test_audio_fps = [] test_annot_fps = [] - + for annot_fn, audio_fn in tqdm.tqdm(zip(annotation_fns, audio_fns)): fn = annot_fn.split('.')[0] train_fns.append(f"{fn}_train") val_fns.append(f"{fn}_val") test_fns.append(f"{fn}_test") - + annot_fp = os.path.join(raw_data_dir, annot_fn) audio_fp = os.path.join(raw_data_dir, audio_fn) - + annot = pd.read_csv(annot_fp) audio, sr = sf.read(audio_fp) - + train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio = process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion) - + train_selection_table_fn = f"{annot_fn.split('.')[0]}_train.txt" train_selection_table_fp = os.path.join(formatted_annot_dir, train_selection_table_fn) train_selection_table.to_csv(train_selection_table_fp, sep = '\t', index = False) train_annot_fps.append(train_selection_table_fp) - + train_audio_fn = f"{audio_fn.split('.')[0]}_train.wav" train_audio_fp = os.path.join(formatted_audio_dir, train_audio_fn) sf.write(train_audio_fp, train_audio, sr) train_audio_fps.append(train_audio_fp) - + val_selection_table_fn = f"{annot_fn.split('.')[0]}_val.txt" val_selection_table_fp = os.path.join(formatted_annot_dir, val_selection_table_fn) val_selection_table.to_csv(val_selection_table_fp, sep = '\t', index = False) val_annot_fps.append(val_selection_table_fp) - + val_audio_fn = f"{audio_fn.split('.')[0]}_val.wav" val_audio_fp = os.path.join(formatted_audio_dir, val_audio_fn) sf.write(val_audio_fp, val_audio, sr) val_audio_fps.append(val_audio_fp) - + test_selection_table_fn = f"{annot_fn.split('.')[0]}_test.txt" test_selection_table_fp = os.path.join(formatted_annot_dir, test_selection_table_fn) test_selection_table.to_csv(test_selection_table_fp, sep = '\t', index = False) test_annot_fps.append(test_selection_table_fp) - + test_audio_fn = f"{audio_fn.split('.')[0]}_test.wav" test_audio_fp = os.path.join(formatted_audio_dir, test_audio_fn) sf.write(test_audio_fp, test_audio, sr) @@ -133,7 +133,7 @@ def main(): test_audio = None val_audio = None train_audio = None - + train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps}) train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv') train_info_df.to_csv(train_info_fp, index = False) diff --git a/datasets/hawaii/process_hawaii.py b/datasets/hawaii/process_hawaii.py index 14060b8..d20b408 100644 --- a/datasets/hawaii/process_hawaii.py +++ b/datasets/hawaii/process_hawaii.py @@ -14,78 +14,79 @@ def main(): cwd = os.getcwd() - + raw_data_dir = os.path.join(cwd, 'raw') audio_dir = os.path.join(cwd, 'soundscape_data') - + raw_annotations_fp = os.path.join(cwd, 'raw', 'annotations.csv') raw_annot_df = pd.read_csv(raw_annotations_fp) raw_annot_df['Annotation'] = raw_annot_df['Species eBird Code'] raw_annot_df = raw_annot_df.drop('Species eBird Code', axis=1) - + raw_annot_df['Begin Time (s)'] = raw_annot_df['Start Time (s)'] raw_annot_df = raw_annot_df.drop('Start Time (s)', axis=1) - + formatted_data_dir = os.path.join(cwd, 'formatted') formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables') for d in [formatted_data_dir, formatted_annot_dir]: if not os.path.exists(d): os.makedirs(d) - + train_proportion = 0.6 val_proportion = 0.2 - + train_audio_fps = [] val_audio_fps = [] test_audio_fps = [] - + for i in range(1,5): audio_fps = sorted(glob(os.path.join(audio_dir, f"*_S0{i}_*.{file_extension}"))) + #audio_fps = sorted(glob(os.path.join(audio_dir, f"*Recording_{i}_*.{file_extension}"))) n_train = int(train_proportion * len(audio_fps)) n_val = int(val_proportion * len(audio_fps)) - + train_audio_fps.extend(audio_fps[:n_train]) val_audio_fps.extend(audio_fps[n_train:n_train+n_val]) test_audio_fps.extend(audio_fps[n_train+n_val:]) - + train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps] val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps] test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps] - + train_annot_fps = [] val_annot_fps = [] test_annot_fps = [] - + for fn, audio_fp in zip(train_fns, train_audio_fps): sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac'] sub_annot_df = sub_annot_df.drop('Filename', axis = 1) - + annot_fn = f"selection_table_{fn.split('.')[0]}.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + sub_annot_df.to_csv(annot_fp, sep = '\t', index = False) train_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(val_fns, val_audio_fps): sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac'] sub_annot_df = sub_annot_df.drop('Filename', axis = 1) - + annot_fn = f"selection_table_{fn.split('.')[0]}.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + sub_annot_df.to_csv(annot_fp, sep = '\t', index = False) val_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(test_fns, test_audio_fps): sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac'] sub_annot_df = sub_annot_df.drop('Filename', axis = 1) - + annot_fn = f"selection_table_{fn.split('.')[0]}.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + sub_annot_df.to_csv(annot_fp, sep = '\t', index = False) test_annot_fps.append(annot_fp) - + train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps}) train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv') train_info_df.to_csv(train_info_fp, index = False) diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index be8ee8c..8a63436 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -1,5 +1,5 @@ import pandas as pd -from voxaboxen.data.data import get_test_dataloader +from voxaboxen.data.data import get_test_dataloader, get_val_dataloader import torch from voxaboxen.model.model import DetectionModel, DetectionModelStereo from voxaboxen.training.train import train @@ -10,7 +10,13 @@ import sys import os + +def print_metrics(metrics, just_one_label): + for pred_type in metrics.keys(): + to_print = {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()} if just_one_label else dict(pd.DataFrame(metrics[pred_type]['summary']).mean(axis=1).round(4)) + print(f'{pred_type}:', to_print) def train_model(args): + ## Setup args = parse_args(args) @@ -34,7 +40,6 @@ def train_model(args): model = DetectionModel(args) if args.reload_from is not None: - #model.load_state_dict(os.path.join(args.experiment_dir), 'model.pt') checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt')) model.load_state_dict(checkpoint['model_state_dict']) @@ -46,19 +51,27 @@ def train_model(args): ## Evaluation test_dataloader = get_test_dataloader(args) + val_dataloader = get_val_dataloader(args) + + val_manifest = predict_and_generate_manifest(trained_model, val_dataloader, args) - manifest = predict_and_generate_manifest(trained_model, test_dataloader, args) + best_comb_discard_thresh = -1 + best_f1 = 0 + for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]: + val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh) + new_f1 = val_metrics['comb']['macro']['f1'] + if new_f1 > best_f1: + best_comb_discard_thresh = comb_discard_thresh + best_f1 = new_f1 + print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}') + print_metrics(val_metrics, just_one_label=(len(args.label_set)==1)) - #class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95] - class_threshes = [0.0, 0.5, 0.95] + test_manifest = predict_and_generate_manifest(trained_model, test_dataloader, args) + print(f'Using thresh: {best_comb_discard_thresh}') for iou in [0.2, 0.5, 0.8]: - for class_threshold in class_threshes: - for comb_discard_thresh in [0.85]: - metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=class_threshold, comb_discard_threshold=comb_discard_thresh) - print(f'IOU: {iou} class_thresh: {class_threshold} Comb discard threshold: {comb_discard_thresh}') - for pred_type in metrics.keys(): - to_print = {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()} if len(args.label_set)==1 else dict(pd.DataFrame(metrics[pred_type]['summary']).mean(axis=1).round(4)) - print(f'{pred_type}:', to_print) + test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=best_comb_discard_thresh) + print(f'Test with IOU{iou}') + print_metrics(test_metrics, just_one_label=(len(args.label_set)==1)) if __name__ == "__main__": train_model(sys.argv[1:]) From 84bcf19d0e94e73486a4ebb4df58bf1ab8d2250a Mon Sep 17 00:00:00 2001 From: Louis Mahon Date: Wed, 24 Apr 2024 09:58:45 +0100 Subject: [PATCH 08/11] clean up some old code --- voxaboxen/data/data.py | 3 +-- voxaboxen/evaluation/evaluation.py | 11 +++++------ voxaboxen/project/project_setup.py | 1 - voxaboxen/training/params.py | 8 ++++---- voxaboxen/training/train.py | 16 ---------------- 5 files changed, 10 insertions(+), 29 deletions(-) diff --git a/voxaboxen/data/data.py b/voxaboxen/data/data.py index 924ce86..0f360e8 100644 --- a/voxaboxen/data/data.py +++ b/voxaboxen/data/data.py @@ -238,8 +238,7 @@ def get_train_dataloader(args, random_seed_shift = 0): train_dataloader = DataLoader(train_dataset, batch_size=effective_batch_size, shuffle=True, - #num_workers=args.num_workers, - num_workers=0, + num_workers=args.num_workers, pin_memory=True, drop_last = True) diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index d0d8e81..97e8082 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -222,7 +222,6 @@ def generate_features(model, single_clip_dataloader, args, verbose = True): return all_features.detach().cpu().numpy() -#def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0): def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, classif_threshold=0): if target_dir is None: @@ -414,8 +413,6 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): fns = [] fwd_predictions_fps = [] bck_predictions_fps = [] - #comb_predictions_fps = [] - #match_predictions_fps = [] annotations_fps = [] for fn in dataloader_dict: @@ -430,7 +427,6 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): bck_predictions_fps.append(bck_predictions_fp) annotations_fps.append(annotations_fp) - #manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps}) manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'annotations_fp' : annotations_fps}) return manifest @@ -487,11 +483,13 @@ def combine_fwd_bck_preds(target_dir, fn, comb_iou_threshold, comb_discard_thres bck_pred = bck_preds.iloc[bp] bp_end_time = bck_pred['End Time (s)'] match_pred['End Time (s)'] = bp_end_time + # Sorta like assuming forward and back predictions are independent, gives a high prob for the matched predictions match_pred['Detection Prob'] = 1 - (1-match_pred['Detection Prob'])*(1-bck_pred['Detection Prob']) match_preds_list.append(match_pred) match_preds = pd.DataFrame(match_preds_list, columns=fwd_preds.columns) - # Now include the union of all that weren't matched + + # Include the union of all predictions that weren't part of the matching fwd_matched_idxs = [m[0] for m in c.matching] bck_matched_idxs = [m[1] for m in c.matching] fwd_unmatched = select_from_neg_idxs(fwd_preds, fwd_matched_idxs) @@ -499,8 +497,9 @@ def combine_fwd_bck_preds(target_dir, fn, comb_iou_threshold, comb_discard_thres to_concat = [x for x in [match_preds, fwd_unmatched, bck_unmatched] if x.shape[0]>0] comb_preds = pd.concat(to_concat) if len(to_concat)>0 else fwd_preds assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching) + + # Finally, keep only predictions above a threshold, this will include almost all matches comb_preds = comb_preds.loc[comb_preds['Detection Prob']>comb_discard_threshold] - #print(f'Using combdiscardthresh {args.comb_discard_threshold} and comb_preds has shape {comb_preds.shape}') comb_preds.sort_values('Begin Time (s)') comb_preds.index = list(range(len(comb_preds))) diff --git a/voxaboxen/project/project_setup.py b/voxaboxen/project/project_setup.py index cd38721..48b39d0 100644 --- a/voxaboxen/project/project_setup.py +++ b/voxaboxen/project/project_setup.py @@ -27,7 +27,6 @@ def project_setup(args): label_set = sorted(set(all_annots)) label_mapping = {x : x for x in label_set} - breakpoint() label_mapping['Unknown'] = 'Unknown' unknown_label = 'Unknown' diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py index 1e580db..b4846b0 100644 --- a/voxaboxen/training/params.py +++ b/voxaboxen/training/params.py @@ -12,8 +12,8 @@ def parse_args(args,allow_unknown=False): # General parser.add_argument('--name', type = str, required=True) parser.add_argument('--seed', type=int, default=0) - parser.add_argument('--is_test', '-t', action='store_true') - parser.add_argument('--overwrite', action='store_true') + parser.add_argument('--is_test', '-t', action='store_true', help='run a quick version for testing') + parser.add_argument('--overwrite', action='store_true', help='overwrite an experiment of the same name, if it exists') # Data parser.add_argument('--project-config-fp', type = str, required=True) @@ -33,8 +33,8 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--previous-checkpoint-fp', type=str, default=None, help="path to checkpoint of previously trained detection model") parser.add_argument('--aves-url', type=str, default = "https://storage.googleapis.com/esp-public-files/ported_aves/aves-base-bio.torchaudio.pt") parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo") - parser.add_argument('--comb-discard-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold") - parser.add_argument('--comb-iou-threshold', type=float, default=0.5, help="discard combined detections whose prob is below this threshold") + parser.add_argument('--comb-discard-threshold', type=float, default=0.75, help="during evaluation, discard combined detections whose prob is below this threshold") + parser.add_argument('--comb-iou-threshold', type=float, default=0.5, help="minimum iou to match a forward and backward prediction") parser.add_argument('--reload-from', type=str) # Training diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py index f4cfc28..d7e9db8 100644 --- a/voxaboxen/training/train.py +++ b/voxaboxen/training/train.py @@ -179,23 +179,7 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss loss = (normal_loss + rev_loss)/2 - #end_mask_perc = args.end_mask_perc - #end_mask_dur = int(probs.size(1)*end_mask_perc) - #d_clipped = d[:,end_mask_dur:-end_mask_dur] - #probs_clipped = probs[:,end_mask_dur:-end_mask_dur] - - #regression_clipped = regression[:,end_mask_dur:-end_mask_dur] - #r_clipped = r[:,end_mask_dur:-end_mask_dur] - - #class_logits_clipped = class_logits[:,end_mask_dur:-end_mask_dur,:] - #y_clipped = y[:,end_mask_dur:-end_mask_dur,:] - - #detection_loss = detection_loss_fn(probs_clipped, d_clipped, pos_loss_weight = args.pos_loss_weight) - #reg_loss = reg_loss_fn(regression_clipped, r_clipped, d_clipped, y_clipped) - #class_loss = class_loss_fn(class_logits_clipped, y_clipped, d_clipped) - - #loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss train_loss += loss.item() rev_train_loss += rev_loss.item() normal_train_loss += normal_loss.item() From 0a12448538e6a198d045842136c7a580d0a65d0a Mon Sep 17 00:00:00 2001 From: Lou1sM Date: Wed, 1 May 2024 13:42:31 +0100 Subject: [PATCH 09/11] remove unused bash scripts and leftover breakpoints --- MT-grid-search.sh | 12 ----- datasets/BV/process_BV.py | 54 +++++++++++------------ datasets/hawaii/process_hawaii.py | 41 +++++++++-------- datasets/powdermill/process_powdermill.py | 49 ++++++++++---------- read_grid_search.py | 26 ----------- run.sh | 1 - voxaboxen/training/train_model.py | 2 +- 7 files changed, 72 insertions(+), 113 deletions(-) delete mode 100644 MT-grid-search.sh delete mode 100644 read_grid_search.py delete mode 100644 run.sh diff --git a/MT-grid-search.sh b/MT-grid-search.sh deleted file mode 100644 index b8a95cd..0000000 --- a/MT-grid-search.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -for detthresh in 0.55; do -#for detthresh in 0.4; do - for combiouthresh in 0.5 0.55 0.6; do - #for combiouthresh in 0.4; do - for combdiscardthresh in 0.8 0.85 0.9; do - combdiscardthresh2=$(echo ${combdiscardthresh}-0.075 | bc -l) - (trap 'kill 0' SIGINT; python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh} & python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh2} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh2} & wait) - done - done -done diff --git a/datasets/BV/process_BV.py b/datasets/BV/process_BV.py index e0b4e36..edcf1bf 100644 --- a/datasets/BV/process_BV.py +++ b/datasets/BV/process_BV.py @@ -16,14 +16,14 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion, audio_dur_sec = np.floor(audio_dur_samples / sr) train_audio_dur_samples = int(audio_dur_samples * train_proportion) train_audio_dur_sec = train_audio_dur_samples / sr - + val_audio_dur_samples = int(audio_dur_samples * val_proportion) val_audio_dur_sec = val_audio_dur_samples / sr - + train_audio = audio[:train_audio_dur_samples] val_audio = audio[train_audio_dur_samples:train_audio_dur_samples+val_audio_dur_samples] test_audio = audio[train_audio_dur_samples+val_audio_dur_samples:] - + ys = [] for i, row in annot.iterrows(): y = 'voc' @@ -33,30 +33,30 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion, # if row[label] == 'POS': # y = label ys.append(y) - + begin_time = list(annot['Starttime']) end_time = list(annot['Endtime']) - + selection_table = pd.DataFrame({'Begin Time (s)' : begin_time, 'End Time (s)' : end_time, 'Annotation' : ys, 'Low Freq (Hz)' : [low_hz for x in begin_time], 'High Freq (Hz)' : [high_hz for x in begin_time]}).drop_duplicates() - + train_selection_table = selection_table[selection_table['End Time (s)'] < train_audio_dur_sec].copy() val_selection_table = selection_table[(selection_table['End Time (s)'] >= train_audio_dur_sec) & (selection_table['End Time (s)'] < train_audio_dur_sec + val_audio_dur_sec)].copy() val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'] - train_audio_dur_sec val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'].map(lambda x : max(x, 0)) val_selection_table['End Time (s)'] = val_selection_table['End Time (s)'] - train_audio_dur_sec - + test_selection_table = selection_table[selection_table['Begin Time (s)'] >= train_audio_dur_sec + val_audio_dur_sec].copy() test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec) test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'].map(lambda x : max(x, 0)) test_selection_table['End Time (s)'] = test_selection_table['End Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec) - + return train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio -def main(): +def main(): cwd = os.getcwd() - + raw_data_dir = os.path.join(cwd, 'raw') - + formatted_data_dir = os.path.join(cwd, 'formatted') formatted_audio_dir = os.path.join(formatted_data_dir, 'audio') formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables') @@ -64,65 +64,65 @@ def main(): if not os.path.exists(d): os.makedirs(d) - train_proportion = 0.6 + train_proportion = 0.6 val_proportion = 0.2 - + annotation_fns = sorted(glob(os.path.join(raw_data_dir, '*.csv'))) annotation_fns = [os.path.basename(x) for x in annotation_fns] audio_fns = sorted(glob(os.path.join(raw_data_dir, '*.wav'))) audio_fns = [os.path.basename(x) for x in audio_fns] - + train_fns = [] train_audio_fps = [] train_annot_fps = [] - + val_fns = [] val_audio_fps = [] val_annot_fps = [] - + test_fns = [] test_audio_fps = [] test_annot_fps = [] - + for annot_fn, audio_fn in tqdm.tqdm(zip(annotation_fns, audio_fns)): fn = annot_fn.split('.')[0] train_fns.append(f"{fn}_train") val_fns.append(f"{fn}_val") test_fns.append(f"{fn}_test") - + annot_fp = os.path.join(raw_data_dir, annot_fn) audio_fp = os.path.join(raw_data_dir, audio_fn) - + annot = pd.read_csv(annot_fp) audio, sr = sf.read(audio_fp) - + train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio = process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion) - + train_selection_table_fn = f"{annot_fn.split('.')[0]}_train.txt" train_selection_table_fp = os.path.join(formatted_annot_dir, train_selection_table_fn) train_selection_table.to_csv(train_selection_table_fp, sep = '\t', index = False) train_annot_fps.append(train_selection_table_fp) - + train_audio_fn = f"{audio_fn.split('.')[0]}_train.wav" train_audio_fp = os.path.join(formatted_audio_dir, train_audio_fn) sf.write(train_audio_fp, train_audio, sr) train_audio_fps.append(train_audio_fp) - + val_selection_table_fn = f"{annot_fn.split('.')[0]}_val.txt" val_selection_table_fp = os.path.join(formatted_annot_dir, val_selection_table_fn) val_selection_table.to_csv(val_selection_table_fp, sep = '\t', index = False) val_annot_fps.append(val_selection_table_fp) - + val_audio_fn = f"{audio_fn.split('.')[0]}_val.wav" val_audio_fp = os.path.join(formatted_audio_dir, val_audio_fn) sf.write(val_audio_fp, val_audio, sr) val_audio_fps.append(val_audio_fp) - + test_selection_table_fn = f"{annot_fn.split('.')[0]}_test.txt" test_selection_table_fp = os.path.join(formatted_annot_dir, test_selection_table_fn) test_selection_table.to_csv(test_selection_table_fp, sep = '\t', index = False) test_annot_fps.append(test_selection_table_fp) - + test_audio_fn = f"{audio_fn.split('.')[0]}_test.wav" test_audio_fp = os.path.join(formatted_audio_dir, test_audio_fn) sf.write(test_audio_fp, test_audio, sr) @@ -133,7 +133,7 @@ def main(): test_audio = None val_audio = None train_audio = None - + train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps}) train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv') train_info_df.to_csv(train_info_fp, index = False) diff --git a/datasets/hawaii/process_hawaii.py b/datasets/hawaii/process_hawaii.py index d20b408..14060b8 100644 --- a/datasets/hawaii/process_hawaii.py +++ b/datasets/hawaii/process_hawaii.py @@ -14,79 +14,78 @@ def main(): cwd = os.getcwd() - + raw_data_dir = os.path.join(cwd, 'raw') audio_dir = os.path.join(cwd, 'soundscape_data') - + raw_annotations_fp = os.path.join(cwd, 'raw', 'annotations.csv') raw_annot_df = pd.read_csv(raw_annotations_fp) raw_annot_df['Annotation'] = raw_annot_df['Species eBird Code'] raw_annot_df = raw_annot_df.drop('Species eBird Code', axis=1) - + raw_annot_df['Begin Time (s)'] = raw_annot_df['Start Time (s)'] raw_annot_df = raw_annot_df.drop('Start Time (s)', axis=1) - + formatted_data_dir = os.path.join(cwd, 'formatted') formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables') for d in [formatted_data_dir, formatted_annot_dir]: if not os.path.exists(d): os.makedirs(d) - + train_proportion = 0.6 val_proportion = 0.2 - + train_audio_fps = [] val_audio_fps = [] test_audio_fps = [] - + for i in range(1,5): audio_fps = sorted(glob(os.path.join(audio_dir, f"*_S0{i}_*.{file_extension}"))) - #audio_fps = sorted(glob(os.path.join(audio_dir, f"*Recording_{i}_*.{file_extension}"))) n_train = int(train_proportion * len(audio_fps)) n_val = int(val_proportion * len(audio_fps)) - + train_audio_fps.extend(audio_fps[:n_train]) val_audio_fps.extend(audio_fps[n_train:n_train+n_val]) test_audio_fps.extend(audio_fps[n_train+n_val:]) - + train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps] val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps] test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps] - + train_annot_fps = [] val_annot_fps = [] test_annot_fps = [] - + for fn, audio_fp in zip(train_fns, train_audio_fps): sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac'] sub_annot_df = sub_annot_df.drop('Filename', axis = 1) - + annot_fn = f"selection_table_{fn.split('.')[0]}.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + sub_annot_df.to_csv(annot_fp, sep = '\t', index = False) train_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(val_fns, val_audio_fps): sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac'] sub_annot_df = sub_annot_df.drop('Filename', axis = 1) - + annot_fn = f"selection_table_{fn.split('.')[0]}.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + sub_annot_df.to_csv(annot_fp, sep = '\t', index = False) val_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(test_fns, test_audio_fps): sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac'] sub_annot_df = sub_annot_df.drop('Filename', axis = 1) - + annot_fn = f"selection_table_{fn.split('.')[0]}.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + sub_annot_df.to_csv(annot_fp, sep = '\t', index = False) test_annot_fps.append(annot_fp) - + train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps}) train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv') train_info_df.to_csv(train_info_fp, index = False) diff --git a/datasets/powdermill/process_powdermill.py b/datasets/powdermill/process_powdermill.py index ed47e56..2a52b7d 100644 --- a/datasets/powdermill/process_powdermill.py +++ b/datasets/powdermill/process_powdermill.py @@ -11,86 +11,86 @@ def main(): cwd = os.getcwd() - + raw_data_dir = os.path.join(cwd, 'raw') raw_annot_dir = os.path.join(raw_data_dir, 'selection_tables') audio_dir = os.path.join(cwd, 'soundscape_data') - + formatted_data_dir = os.path.join(cwd, 'formatted') formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables') for d in [formatted_data_dir, formatted_annot_dir]: if not os.path.exists(d): os.makedirs(d) - + raw_annotations_fps = sorted(glob(os.path.join(cwd, 'raw', 'selection_tables', '*.txt'))) - + train_proportion = 0.6 val_proportion = 0.2 - + train_audio_fps = [] val_audio_fps = [] test_audio_fps = [] - + for i in range(1,5): audio_fps = sorted(glob(os.path.join(audio_dir, f"Recording_{i}_*"))) n_train = int(train_proportion * len(audio_fps)) n_val = int(val_proportion * len(audio_fps)) - + train_audio_fps.extend(audio_fps[:n_train]) val_audio_fps.extend(audio_fps[n_train:n_train+n_val]) test_audio_fps.extend(audio_fps[n_train+n_val:]) - - + + train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps] val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps] test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps] - + train_annot_fps = [] val_annot_fps = [] test_annot_fps = [] - + for fn, audio_fp in zip(train_fns, train_audio_fps): annot_fn = f"{fn}.Table.1.selections.txt" raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn) - + annot_df = pd.read_csv(raw_annotations_fp, sep = '\t') annot_df['Annotation'] = annot_df['Species'] annot_df = annot_df.drop('Species', axis=1) - + annot_fn = f"{fn}.Table.1.selections.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + annot_df.to_csv(annot_fp, sep = '\t', index = False) train_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(val_fns, val_audio_fps): annot_fn = f"{fn}.Table.1.selections.txt" raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn) - + annot_df = pd.read_csv(raw_annotations_fp, sep = '\t') annot_df['Annotation'] = annot_df['Species'] annot_df = annot_df.drop('Species', axis=1) - + annot_fn = f"{fn}.Table.1.selections.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + annot_df.to_csv(annot_fp, sep = '\t', index = False) val_annot_fps.append(annot_fp) - + for fn, audio_fp in zip(test_fns, test_audio_fps): annot_fn = f"{fn}.Table.1.selections.txt" raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn) - + annot_df = pd.read_csv(raw_annotations_fp, sep = '\t') annot_df['Annotation'] = annot_df['Species'] annot_df = annot_df.drop('Species', axis=1) - + annot_fn = f"{fn}.Table.1.selections.txt" annot_fp = os.path.join(formatted_annot_dir, annot_fn) - + annot_df.to_csv(annot_fp, sep = '\t', index = False) test_annot_fps.append(annot_fp) - + train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps}) train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv') train_info_df.to_csv(train_info_fp, index = False) @@ -100,7 +100,6 @@ def main(): test_info_df = pd.DataFrame({'fn' : test_fns, 'audio_fp' : test_audio_fps, 'selection_table_fp' : test_annot_fps}) test_info_fp = os.path.join(formatted_data_dir, 'test_info.csv') test_info_df.to_csv(test_info_fp, index = False) - breakpoint() if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/read_grid_search.py b/read_grid_search.py deleted file mode 100644 index de3001e..0000000 --- a/read_grid_search.py +++ /dev/null @@ -1,26 +0,0 @@ -import yaml -from os.path import join -import os -import pandas as pd - - -all_results = {} -for detthresh in (0.4, 0.55, 0.7, 0.85): - all_results[detthresh] = {} - for combiouthresh in (0.4, 0.55, 0.7, 0.85): - all_results[detthresh][combiouthresh] = {} - for combdiscardthresh in (0.4, 0.55, 0.7, 0.85): - all_results[detthresh][combiouthresh][combdiscardthresh] = {} - resdir = f'projects/MT_experiment/bidirectional-{detthresh}-{combiouthresh}-{combdiscardthresh}/test_results' - if not os.path.exists(resdir): - continue - results = {} - for iouf1 in (2,5,8): - with open(join(resdir, f'metrics_iou_0.{iouf1}_class_threshold_0.yaml')) as f: - exp_results = yaml.safe_load(f) - for pred_type in ('fwd','bck','comb','match'): - results[f'testiou{iouf1}-{pred_type}'] = exp_results[pred_type]['macro']['f1'] - all_results[detthresh][combiouthresh][combdiscardthresh] = results - -breakpoint() - diff --git a/run.sh b/run.sh deleted file mode 100644 index fc135c2..0000000 --- a/run.sh +++ /dev/null @@ -1 +0,0 @@ -python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=demo --lr=.00005 --batch-size=4 --n-epochs 4 diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index 8a63436..8e14871 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -24,7 +24,7 @@ def train_model(args): experiment_dir = os.path.join(args.project_dir, args.name) setattr(args, 'experiment_dir', str(experiment_dir)) - if os.path.exists(args.experiment_dir) and not args.overwrite: + if os.path.exists(args.experiment_dir) and args.overwrite and args.name!='demo': sys.exit('experiment already exists with this name') os.makedirs(args.experiment_dir) From 73127332325e9d5f5b84f4f57743fef191f714d8 Mon Sep 17 00:00:00 2001 From: Lou1sM Date: Fri, 17 May 2024 17:18:29 +0100 Subject: [PATCH 10/11] pass cl-arg to run bidirectional, stereo and/or segmentation --- voxaboxen/evaluation/evaluation.py | 61 +++++++++++++++---------- voxaboxen/model/model.py | 23 ++++++++-- voxaboxen/training/params.py | 5 ++ voxaboxen/training/train.py | 73 +++++++++++++++++------------- voxaboxen/training/train_model.py | 35 ++++++-------- 5 files changed, 116 insertions(+), 81 deletions(-) diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index 97e8082..b6d5362 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -133,16 +133,18 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True): X = X.to(device = device, dtype = torch.float) X, _, _, _ = rms_and_mixup(X, None, None, None, False, args) - detection, regression, classif, rev_detection, rev_regression, rev_classif = model(X) - classif = torch.nn.functional.softmax(classif, dim=-1) - rev_classif = torch.nn.functional.softmax(rev_classif, dim=-1) - - all_detections.append(detection) - all_regressions.append(regression) - all_classifs.append(classif) - all_rev_detections.append(rev_detection) - all_rev_regressions.append(rev_regression) - all_rev_classifs.append(rev_classif) + model_outputs = model(X) + assert isinstance(model_outputs, tuple) + all_detections.append(model_outputs[0]) + all_regressions.append(model_outputs[1]) + all_classifs.append(model_outputs[2].softmax(-1)) + if model.is_bidirectional: + assert all(x is not None for x in model_outputs) + all_rev_detections.append(model_outputs[3]) + all_rev_regressions.append(model_outputs[4]) + all_rev_classifs.append(model_outputs[5].softmax(-1)) + else: + assert all(x is None for x in model_outputs[3:]) if args.is_test and i==15: break @@ -150,10 +152,10 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True): all_detections = torch.cat(all_detections) all_regressions = torch.cat(all_regressions) all_classifs = torch.cat(all_classifs) - all_rev_detections = torch.cat(all_rev_detections) - all_rev_regressions = torch.cat(all_rev_regressions) - all_rev_classifs = torch.cat(all_rev_classifs) - + if model.is_bidirectional: + all_rev_detections = torch.cat(all_rev_detections) + all_rev_regressions = torch.cat(all_rev_regressions) + all_rev_classifs = torch.cat(all_rev_classifs) ######## Todo: Need better checking that preds are the correct dur assert all_detections.size(dim=1) % 2 == 0 @@ -186,7 +188,10 @@ def assemble(d, r, c): return assembled_d.detach().cpu().numpy(), assembled_r.detach().cpu().numpy(), assembled_c.detach().cpu().numpy(), assembled_dets, assembled_regs, assembled_classifs = assemble(all_detections, all_regressions, all_classifs) - assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs = assemble(all_rev_detections, all_rev_regressions, all_rev_classifs) + if model.is_bidirectional: + assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs = assemble(all_rev_detections, all_rev_regressions, all_rev_classifs) + else: + assembled_rev_dets = assembled_rev_regs = assembled_rev_classifs = None return assembled_dets, assembled_regs, assembled_classifs, assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs def generate_features(model, single_clip_dataloader, args, verbose = True): @@ -419,7 +424,12 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): fwd_detections, fwd_regressions, fwd_classifications, bck_detections, bck_regressions, bck_classifications = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose) fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose) - bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose) + if model.is_bidirectional: + assert all(x is not None for x in [bck_detections, bck_classifications, bck_regressions]) + bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose) + else: + assert all(x is None for x in [bck_detections, bck_classifications, bck_regressions]) + bck_predictions_fp = None annotations_fp = dataloader_dict[fn].dataset.annot_fp fns.append(fn) @@ -431,20 +441,21 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): return manifest def evaluate_based_on_manifest(manifest, args, output_dir, iou, class_threshold, comb_discard_threshold): - pred_types = ('fwd', 'bck', 'comb', 'match') + pred_types = ('fwd', 'bck', 'comb', 'match') if args.bidirectional else ('fwd',) metrics = {p:{} for p in pred_types} conf_mats = {p:{} for p in pred_types} conf_mat_labels = {} for i, row in manifest.iterrows(): - fn = row['filename'] - annots_fp = row['annotations_fp'] - row['comb_predictions_fp'], row['match_predictions_fp'] = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=comb_discard_threshold) + fn = row['filename'] + annots_fp = row['annotations_fp'] + if args.bidirectional: + row['comb_predictions_fp'], row['match_predictions_fp'] = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=comb_discard_threshold) - for pred_type in pred_types: - preds_fp = row[f'{pred_type}_predictions_fp'] - metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold) - conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold) + for pred_type in pred_types: + preds_fp = row[f'{pred_type}_predictions_fp'] + metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold) + conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold) if output_dir is not None: if not os.path.exists(output_dir): @@ -452,7 +463,7 @@ def evaluate_based_on_manifest(manifest, args, output_dir, iou, class_threshold, # summarize and save metrics conf_mat_summaries = {} - for pred_type in ('fwd', 'bck', 'comb', 'match'): + for pred_type in pred_types: summary = summarize_metrics(metrics[pred_type]) metrics[pred_type]['summary'] = summary metrics[pred_type]['macro'] = macro_metrics(summary) diff --git a/voxaboxen/model/model.py b/voxaboxen/model/model.py index 5fe4439..5f8a6a8 100644 --- a/voxaboxen/model/model.py +++ b/voxaboxen/model/model.py @@ -44,11 +44,17 @@ def unfreeze(self): class DetectionModel(nn.Module): def __init__(self, args, embedding_dim=768): super().__init__() + self.is_bidirectional = args.bidirectional + self.is_stereo = args.stereo + self.is_segmentation = args.segmentation + if self.is_stereo: + embedding_dim *= 2 self.encoder = AvesEmbedding(args) self.args = args aves_sr = args.sr // args.scale_factor self.detection_head = DetectionHead(args, embedding_dim = embedding_dim) - self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim) + if self.is_bidirectional: + self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim) def forward(self, x): """ @@ -64,7 +70,12 @@ def forward(self, x): expected_dur_output = math.ceil(x.size(1)/self.args.scale_factor) x = x-torch.mean(x,axis=1,keepdim=True) - feats = self.encoder(x) + if self.is_stereo: + feats0 = self.encoder(x[:,0,:]) + feats1 = self.encoder(x[:,1,:]) + feats = torch.cat([feats0,feats1],dim=-1) + else: + feats = self.encoder(x) #aves may be off by 1 sample from expected pad = expected_dur_output - feats.size(1) @@ -73,8 +84,11 @@ def forward(self, x): detection_logits, regression, class_logits = self.detection_head(feats) detection_probs = torch.sigmoid(detection_logits) - rev_detection_logits, rev_regression, rev_class_logits = self.rev_detection_head(feats) - rev_detection_probs = torch.sigmoid(rev_detection_logits) + if self.is_bidirectional: + rev_detection_logits, rev_regression, rev_class_logits = self.rev_detection_head(feats) + rev_detection_probs = torch.sigmoid(rev_detection_logits) + else: + rev_detection_probs = rev_regression = rev_class_logits = None return detection_probs, regression, class_logits, rev_detection_probs, rev_regression, rev_class_logits @@ -161,7 +175,6 @@ def forward(self, x): return detection_probs, regression, class_logits - def rms_and_mixup(X, d, r, y, train, args): if args.rms_norm: ms = torch.mean(X ** 2, dim = -1, keepdim = True) diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py index b4846b0..3f0e239 100644 --- a/voxaboxen/training/params.py +++ b/voxaboxen/training/params.py @@ -23,6 +23,8 @@ def parse_args(args,allow_unknown=False): parser.add_argument('--num-workers', type=int, default=8) # Model + parser.add_argument('--bidirectional', action='store_true', help="train and inference in both directions and combine results") + parser.add_argument('--segmentation', action='store_true') parser.add_argument('--sr', type=int, default=16000) parser.add_argument('--scale-factor', type=int, default = 320, help = "downscaling performed by aves") parser.add_argument('--aves-model-weight-fp', type=str, default = "weights/aves-base-bio.torchaudio.pt") @@ -77,6 +79,9 @@ def parse_args(args,allow_unknown=False): if args.clip_hop is None: setattr(args, "clip_hop", args.clip_duration/2) + if args.bidirectional and args.segmentation: + raise ValueError("bidirectional and segmentation settings are not currently compatible") + if allow_unknown: return args, remaining else: diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py index d7e9db8..adb7a90 100644 --- a/voxaboxen/training/train.py +++ b/voxaboxen/training/train.py @@ -65,7 +65,7 @@ def train(model, args): if use_val: eval_scores = val_epoch(model, t, val_dataloader, args) # TODO: maybe plot evals for other pred_types - val_evals.append(eval_scores['comb'].copy()) + val_evals.append(eval_scores['fwd'].copy()) plot_eval(train_evals, learning_rates, args, val_evals=val_evals) val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)} @@ -77,7 +77,7 @@ def train(model, args): scheduler.step() if use_val and args.early_stopping: - current_f1 = eval_scores['comb']['f1'] + current_f1 = eval_scores['comb']['f1'] if model.is_bidirectional else eval_scores['fwd']['f1'] if current_f1 > best_f1: print('found new best model') best_f1 = current_f1 @@ -119,7 +119,7 @@ def train(model, args): # resave validation with best model if use_val: - val_epoch(model, t+1, val_dataloader, args) + val_epoch(model, args.n_epochs, val_dataloader, args) return model @@ -153,52 +153,63 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss evals = {} - normal_train_loss = 0; normal_losses = []; detection_losses = []; regression_losses = []; class_losses = [] + train_loss = 0; losses = []; detection_losses = []; regression_losses = []; class_losses = [] rev_train_loss = 0; rev_losses = []; rev_detection_losses = []; rev_regression_losses = []; rev_class_losses = [] - train_loss = 0; losses = [] + data_iterator = tqdm.tqdm(dataloader) - for i, (X, d, r, y, rev_d, rev_r, rev_y) in enumerate(data_iterator): + #for i, (X, d, r, y, rev_d, rev_r, rev_y) in enumerate(data_iterator): + for i, batch in enumerate(data_iterator): num_batches_seen = i - X = X.to(device = device, dtype = torch.float) - d = d.to(device = device, dtype = torch.float) - r = r.to(device = device, dtype = torch.float) - y = y.to(device = device, dtype = torch.float) - rev_d = rev_d.to(device = device, dtype = torch.float) - rev_r = rev_r.to(device = device, dtype = torch.float) - rev_y = rev_y.to(device = device, dtype = torch.float) - - X, d, r, y = rms_and_mixup(X, d, r, y, True, args) - _, rev_d, rev_r, rev_y = rms_and_mixup(X, rev_d, rev_r, rev_y, True, args) - probs, regression, class_logits, rev_probs, rev_regression, rev_class_logits = model(X) - + batch = [item.to(device, dtype=torch.float) for item in batch] + X, d, r, y = batch[:4] + #X = X.to(device = device, dtype = torch.float) + #d = d.to(device = device, dtype = torch.float) + #r = r.to(device = device, dtype = torch.float) + #y = y.to(device = device, dtype = torch.float) # We mask out loss from each end of the clip, so the model isn't forced to learn to detect events that are partially cut off. # This does not affect inference, because during inference we overlap clips at 50% - + X, d, r, y = rms_and_mixup(X, d, r, y, True, args) + probs, regression, class_logits, rev_probs, rev_regression, rev_class_logits = model(X) + #model_outputs = model(X) + #probs, regression, class_logits = model_outputs[:3] detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, y, class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn) - rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_y, rev_class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn) - normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss - rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss - loss = (normal_loss + rev_loss)/2 + loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss train_loss += loss.item() - rev_train_loss += rev_loss.item() - normal_train_loss += normal_loss.item() - normal_losses.append(normal_loss.item()) - rev_losses.append(rev_loss.item()) losses.append(loss.item()) detection_losses.append(detection_loss.item()) regression_losses.append(args.lamb * reg_loss.item()) class_losses.append(args.rho * class_loss.item()) - rev_detection_losses.append(rev_detection_loss.item()) - rev_regression_losses.append(args.lamb * rev_reg_loss.item()) - rev_class_losses.append(args.rho * rev_class_loss.item()) + + pbar_str = f"loss {np.mean(losses[-10:]):.5f}, det {np.mean(detection_losses[-10:]):.5f}, reg {np.mean(regression_losses[-10:]):.5f}, class {np.mean(class_losses[-10:]):.5f}" + + if model.is_bidirectional: + assert all(x is not None for x in [rev_probs, rev_regression, rev_class_logits]) + rev_d, rev_r, rev_y = batch[4:] + #rev_probs, rev_regression, rev_class_logits = model_outputs[3:] + _, rev_d, rev_r, rev_y = rms_and_mixup(X, rev_d, rev_r, rev_y, True, args) + + + rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_y, rev_class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn) + rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss + rev_train_loss += rev_loss.item() + rev_losses.append(rev_loss.item()) + rev_detection_losses.append(rev_detection_loss.item()) + rev_regression_losses.append(args.lamb * rev_reg_loss.item()) + rev_class_losses.append(args.rho * rev_class_loss.item()) + loss = (loss + rev_loss)/2 + + pbar_str += f" revloss {np.mean(rev_losses[-10:]):.5f}, revdet {np.mean(rev_detection_losses[-10:]):.5f}, revreg {np.mean(rev_regression_losses[-10:]):.5f}, revclass {np.mean(rev_class_losses[-10:]):.5f}" + else: + assert all(x is None for x in [rev_probs, rev_regression, rev_class_logits]) + optimizer.zero_grad() loss.backward() optimizer.step() if i > 10: - data_iterator.set_description(f"loss {np.mean(losses[-10:]):.5f}, det {np.mean(detection_losses[-10:]):.5f}, reg {np.mean(regression_losses[-10:]):.5f}, class {np.mean(class_losses[-10:]):.5f} revloss {np.mean(rev_losses[-10:]):.5f}, revdet {np.mean(rev_detection_losses[-10:]):.5f}, revreg {np.mean(rev_regression_losses[-10:]):.5f}, revclass {np.mean(rev_class_losses[-10:]):.5f}") + data_iterator.set_description(pbar_str) if args.is_test and i == 15: break diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index 8e14871..7db1315 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -34,20 +34,14 @@ def train_model(args): os.makedirs(args.experiment_output_dir) save_params(args) - if hasattr(args,'stereo') and args.stereo: - model = DetectionModelStereo(args) - else: - model = DetectionModel(args) + model = DetectionModel(args) if args.reload_from is not None: checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt')) model.load_state_dict(checkpoint['model_state_dict']) ## Training - if args.n_epochs == 0: - trained_model = model - else: - trained_model = train(model, args) + trained_model = train(model, args) ## Evaluation test_dataloader = get_test_dataloader(args) @@ -55,21 +49,22 @@ def train_model(args): val_manifest = predict_and_generate_manifest(trained_model, val_dataloader, args) - best_comb_discard_thresh = -1 - best_f1 = 0 - for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]: - val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh) - new_f1 = val_metrics['comb']['macro']['f1'] - if new_f1 > best_f1: - best_comb_discard_thresh = comb_discard_thresh - best_f1 = new_f1 - print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}') - print_metrics(val_metrics, just_one_label=(len(args.label_set)==1)) + model.comb_discard_thresh = -1 + if model.is_bidirectional: + best_f1 = 0 + for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]: + val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh) + new_f1 = val_metrics['comb']['macro']['f1'] + if new_f1 > best_f1: + model.comb_discard_thresh = comb_discard_thresh + best_f1 = new_f1 + print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}') + print_metrics(val_metrics, just_one_label=(len(args.label_set)==1)) + print(f'Using comb_discard_thresh: {model.comb_discard_thresh}') test_manifest = predict_and_generate_manifest(trained_model, test_dataloader, args) - print(f'Using thresh: {best_comb_discard_thresh}') for iou in [0.2, 0.5, 0.8]: - test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=best_comb_discard_thresh) + test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=model.comb_discard_thresh) print(f'Test with IOU{iou}') print_metrics(test_metrics, just_one_label=(len(args.label_set)==1)) From cfcfe5055950986f0824fc692b69509ba38de059 Mon Sep 17 00:00:00 2001 From: Lou1sM Date: Sat, 18 May 2024 11:49:37 +0100 Subject: [PATCH 11/11] handle loading of maybe-bidirectional model at inference --- voxaboxen/evaluation/evaluation.py | 10 +++--- voxaboxen/inference/inference.py | 53 +++++++++++++++++------------- voxaboxen/inference/params.py | 7 ++-- voxaboxen/model/model.py | 1 + voxaboxen/training/train_model.py | 21 ++++++------ 5 files changed, 52 insertions(+), 40 deletions(-) diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py index b6d5362..8ddbb9f 100644 --- a/voxaboxen/evaluation/evaluation.py +++ b/voxaboxen/evaluation/evaluation.py @@ -227,7 +227,7 @@ def generate_features(model, single_clip_dataloader, args, verbose = True): return all_features.detach().cpu().numpy() -def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, classif_threshold=0): +def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, detection_threshold=0, classif_threshold=0): if target_dir is None: target_dir = args.experiment_output_dir @@ -248,7 +248,7 @@ def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=Tr # np.save(target_fp, classifs) ## peaks - det_peaks, properties = find_peaks(dets, height=args.detection_threshold, distance=args.peak_distance) + det_peaks, properties = find_peaks(dets, height=detection_threshold, distance=args.peak_distance) det_probs = properties['peak_heights'] ## regs and classifs @@ -278,7 +278,7 @@ def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=Tr bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_bck) if args.nms == "soft_nms": - bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=args.detection_threshold) + bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=detection_threshold) elif args.nms == "nms": bboxes, det_probs, class_idxs, class_probs = nms(bboxes, det_probs, class_idxs, class_probs, iou_thresh=args.nms_thresh) @@ -423,10 +423,10 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True): for fn in dataloader_dict: fwd_detections, fwd_regressions, fwd_classifications, bck_detections, bck_regressions, bck_classifications = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose) - fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose) + fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose, detection_threshold=args.detection_threshold) if model.is_bidirectional: assert all(x is not None for x in [bck_detections, bck_classifications, bck_regressions]) - bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose) + bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose, detection_threshold=args.detection_threshold) else: assert all(x is None for x in [bck_detections, bck_classifications, bck_regressions]) bck_predictions_fp = None diff --git a/voxaboxen/inference/inference.py b/voxaboxen/inference/inference.py index f223587..adfe9ce 100644 --- a/voxaboxen/inference/inference.py +++ b/voxaboxen/inference/inference.py @@ -6,54 +6,63 @@ from voxaboxen.inference.params import parse_inference_args from voxaboxen.training.params import load_params from voxaboxen.model.model import DetectionModel, DetectionModelStereo -from voxaboxen.evaluation.evaluation import generate_predictions, export_to_selection_table +from voxaboxen.evaluation.evaluation import generate_predictions, export_to_selection_table, combine_fwd_bck_preds from voxaboxen.data.data import get_single_clip_data device = "cuda" if torch.cuda.is_available() else "cpu" def inference(inference_args): inference_args = parse_inference_args(inference_args) - args = load_params(inference_args.model_args_fp) + args = load_params(inference_args.model_args_fp) files_to_infer = pd.read_csv(inference_args.file_info_for_inference) - + output_dir = os.path.join(args.experiment_dir, 'inference') if not os.path.exists(output_dir): - os.makedirs(output_dir) - - # model - if hasattr(args,'stereo') and args.stereo: - model = DetectionModelStereo(args) - else: - model = DetectionModel(args) - model_checkpoint_fp = os.path.join(args.experiment_dir, "model.pt") + os.makedirs(output_dir) + + # model + #if hasattr(args,'stereo') and args.stereo: + #model = DetectionModelStereo(args) + #else: + model = DetectionModel(args) + model_checkpoint_fp = os.path.join(args.experiment_dir, "final-model.pt") print(f"Loading model weights from {model_checkpoint_fp}") cp = torch.load(model_checkpoint_fp) - model.load_state_dict(cp["model_state_dict"]) + model.load_state_dict(cp) model = model.to(device) - + for i, row in files_to_infer.iterrows(): audio_fp = row['audio_fp'] fn = row['fn'] - + if not os.path.exists(audio_fp): print(f"Could not locate file {audio_fp}") continue - + try: dataloader = get_single_clip_data(audio_fp, args.clip_duration/2, args) except: print(f"Could not load file {audio_fp}") continue - + if len(dataloader) == 0: print(f"Skipping {fn} because it is too short") continue - - detections, regressions, classifications = generate_predictions(model, dataloader, args, verbose = True) - - target_fp = export_to_selection_table(detections, regressions, classifications, fn, args, verbose=True, target_dir=output_dir, detection_threshold = inference_args.detection_threshold, classification_threshold = inference_args.classification_threshold) - - print(f"Saving predictions for {fn} to {target_fp}") + + if inference_args.disable_bidirectional and not model.is_bidirectional: + print('Warning: you have passed the disable-bidirectional arg but model is not is_bidirectional') + detections, regressions, classifs, rev_detections, rev_regressions, rev_classifs = generate_predictions(model, dataloader, args, verbose = True) + fwd_target_fp = export_to_selection_table(detections, regressions, classifs, fn, args, is_bck=False, verbose=True, target_dir=output_dir, detection_threshold=inference_args.detection_threshold, classif_threshold=inference_args.classification_threshold) + if model.is_bidirectional and not inference_args.disable_bidirectional: + rev_target_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifs, fn, args, is_bck=True, verbose=True, target_dir=output_dir, detection_threshold=inference_args.detection_threshold, classif_threshold=inference_args.classification_threshold) + comb_target_fp, match_target_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=model.comb_discard_thresh.item()) + print(f"Saving predictions for {fn} to {comb_target_fp}") + + + #preds_manifest = predict_and_generate_manifest(model, dataloader_dict + + else: + print(f"Saving predictions for {fn} to {fwd_target_fp}") if __name__ == "__main__": main(sys.argv[1:]) diff --git a/voxaboxen/inference/params.py b/voxaboxen/inference/params.py index e6c5274..b54b3c2 100644 --- a/voxaboxen/inference/params.py +++ b/voxaboxen/inference/params.py @@ -5,11 +5,12 @@ def parse_inference_args(inference_args): parser = argparse.ArgumentParser() - + parser.add_argument('--model-args-fp', type=str, required=True, help = "filepath of model params saved as a yaml") parser.add_argument('--file-info-for-inference', type=str, required=True, help = "filepath of info csv listing filenames and filepaths of audio for inference") parser.add_argument('--detection-threshold', type=float, default=0.5, help="detection peaks need to be at or above this threshold to make it into the exported selection table") parser.add_argument('--classification-threshold', type=float, default=0.0, help="classification probability needs to be at or above this threshold to not be labeled as Unknown") - - inference_args = parser.parse_args(inference_args) + parser.add_argument('--disable-bidirectional', action='store_true') + + inference_args = parser.parse_args(inference_args) return inference_args diff --git a/voxaboxen/model/model.py b/voxaboxen/model/model.py index 5f8a6a8..6d472de 100644 --- a/voxaboxen/model/model.py +++ b/voxaboxen/model/model.py @@ -53,6 +53,7 @@ def __init__(self, args, embedding_dim=768): self.args = args aves_sr = args.sr // args.scale_factor self.detection_head = DetectionHead(args, embedding_dim = embedding_dim) + self.comb_discard_thresh = nn.Parameter(torch.tensor(0.)) if self.is_bidirectional: self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim) diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py index 7db1315..2f68b92 100644 --- a/voxaboxen/training/train_model.py +++ b/voxaboxen/training/train_model.py @@ -1,12 +1,12 @@ +import torch.nn as nn import pandas as pd -from voxaboxen.data.data import get_test_dataloader, get_val_dataloader import torch -from voxaboxen.model.model import DetectionModel, DetectionModelStereo +from voxaboxen.data.data import get_test_dataloader, get_val_dataloader +from voxaboxen.model.model import DetectionModel from voxaboxen.training.train import train from voxaboxen.training.params import parse_args, set_seed, save_params -from voxaboxen.evaluation.evaluation import generate_predictions, export_to_selection_table, get_metrics, summarize_metrics, predict_and_generate_manifest, evaluate_based_on_manifest +from voxaboxen.evaluation.evaluation import predict_and_generate_manifest, evaluate_based_on_manifest -import yaml import sys import os @@ -36,9 +36,9 @@ def train_model(args): save_params(args) model = DetectionModel(args) - if args.reload_from is not None: - checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt')) - model.load_state_dict(checkpoint['model_state_dict']) + #if args.reload_from is not None: + #checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt')) + #model.load_state_dict(checkpoint['model_state_dict']) ## Training trained_model = train(model, args) @@ -49,14 +49,13 @@ def train_model(args): val_manifest = predict_and_generate_manifest(trained_model, val_dataloader, args) - model.comb_discard_thresh = -1 if model.is_bidirectional: best_f1 = 0 for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]: val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh) new_f1 = val_metrics['comb']['macro']['f1'] if new_f1 > best_f1: - model.comb_discard_thresh = comb_discard_thresh + model.comb_discard_thresh = nn.Parameter(torch.tensor(comb_discard_thresh)) best_f1 = new_f1 print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}') print_metrics(val_metrics, just_one_label=(len(args.label_set)==1)) @@ -64,10 +63,12 @@ def train_model(args): test_manifest = predict_and_generate_manifest(trained_model, test_dataloader, args) for iou in [0.2, 0.5, 0.8]: - test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=model.comb_discard_thresh) + test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=model.comb_discard_thresh.item()) print(f'Test with IOU{iou}') print_metrics(test_metrics, just_one_label=(len(args.label_set)==1)) + torch.save(model.state_dict(), os.path.join(args.experiment_dir, 'final-model.pt')) + if __name__ == "__main__": train_model(sys.argv[1:])