From ae89c6a9cbb6def226ab787a942b60a2fb2a8fd4 Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Tue, 9 Apr 2024 11:22:43 +0100
Subject: [PATCH 01/11] get each direction working separately

---
 run.sh                             |   1 +
 voxaboxen/data/data.py             | 143 +++++++------
 voxaboxen/evaluation/evaluation.py | 318 ++++++++++++++++-------------
 voxaboxen/model/model.py           |  71 ++++---
 voxaboxen/training/params.py       |  37 ++--
 voxaboxen/training/train.py        | 238 ++++++++++++---------
 voxaboxen/training/train_model.py  |  20 +-
 7 files changed, 461 insertions(+), 367 deletions(-)
 create mode 100644 run.sh

diff --git a/run.sh b/run.sh
new file mode 100644
index 0000000..fc135c2
--- /dev/null
+++ b/run.sh
@@ -0,0 +1 @@
+python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=demo --lr=.00005 --batch-size=4 --n-epochs 4
diff --git a/voxaboxen/data/data.py b/voxaboxen/data/data.py
index 620c614..924ce86 100644
--- a/voxaboxen/data/data.py
+++ b/voxaboxen/data/data.py
@@ -1,9 +1,7 @@
-import os
 import math
 import numpy as np
 import pandas as pd
 import librosa
-import warnings
 
 from numpy.random import default_rng
 from intervaltree import IntervalTree
@@ -16,16 +14,16 @@
 def normalize_sig_np(sig, eps=1e-8):
     sig = sig / (np.max(np.abs(sig))+eps)
     return sig
-  
+
 def crop_and_pad(wav, sr, dur_sec):
   # crops and pads waveform to be the expected number of samples; used after resampling to ensure proper size
   target_dur_samples = int(sr * dur_sec)
   wav = wav[..., :target_dur_samples]
-  
+
   pad = target_dur_samples - wav.size(-1)
   if pad > 0:
     wav = F.pad(wav, (0,pad)) #padding starts from last dims
-    
+
   return wav
 
 class DetectionDataset(Dataset):
@@ -47,7 +45,7 @@ def __init__(self, info_df, train, args, random_seed_shift = 0):
         if self.amp_aug:
           self.amp_aug_low_r = args.amp_aug_low_r
           self.amp_aug_high_r = args.amp_aug_high_r
-          assert (self.amp_aug_low_r >= 0) #and (self.amp_aug_high_r <= 1) and 
+          assert (self.amp_aug_low_r >= 0) #and (self.amp_aug_high_r <= 1) and
           assert (self.amp_aug_low_r <= self.amp_aug_high_r)
 
         self.scale_factor = args.scale_factor
@@ -59,14 +57,14 @@ def __init__(self, info_df, train, args, random_seed_shift = 0):
           self.mono = False
         else:
           self.mono = True
-        
+
         if self.train:
           self.omit_empty_clip_prob = args.omit_empty_clip_prob
           self.clip_start_offset = self.rng.integers(0, np.floor(self.clip_hop*self.sr)) / self.sr
         else:
           self.omit_empty_clip_prob = 0
           self.clip_start_offset = 0
-        
+
         # make metadata
         self.make_metadata()
 
@@ -86,15 +84,15 @@ def process_selection_table(self, selection_table_fp):
             start = row['Begin Time (s)']
             end = row['End Time (s)']
             label = row['Annotation']
-            
+
             if end<=start:
               continue
-            
+
             if label in self.label_mapping:
               label = self.label_mapping[label]
             else:
               continue
-            
+
             if label == self.unknown_label:
               label_idx = -1
             else:
@@ -110,7 +108,7 @@ def make_metadata(self):
         for ii, row in self.info_df.iterrows():
             fn = row['fn']
             audio_fp = row['audio_fp']
-            
+
             duration = librosa.get_duration(path=audio_fp)
             selection_table_fp = row['selection_table_fp']
 
@@ -141,10 +139,10 @@ def get_pos_intervals(self, fn, start, end):
         intervals = [(max(iv.begin, start)-start, min(iv.end, end)-start, iv.data) for iv in intervals]
 
         return intervals
-      
+
     def get_class_proportions(self):
         counts = np.zeros((self.n_classes,))
-        
+
         for k in self.selection_table_dict:
           st = self.selection_table_dict[k]
           for interval in st:
@@ -153,91 +151,101 @@ def get_class_proportions(self):
               continue
             else:
               counts[annot] += 1
-        
+
         total_count = np.sum(counts)
         proportions = counts / total_count
-          
+
         return proportions
-          
 
     def get_annotation(self, pos_intervals, audio):
-        
         raw_seq_len = audio.shape[-1]
         seq_len = int(math.ceil(raw_seq_len / self.scale_factor_raw_to_prediction))
-        regression_anno = np.zeros((seq_len,))
-        class_anno = np.zeros((seq_len, self.n_classes)) 
-
         anno_sr = int(self.sr // self.scale_factor_raw_to_prediction)
-        
+
+        regression_annos = np.zeros((seq_len,))
+        class_annos = np.zeros((seq_len, self.n_classes))
         anchor_annos = [np.zeros(seq_len,)]
+        rev_regression_annos = np.zeros((seq_len,))
+        rev_class_annos = np.zeros((seq_len, self.n_classes))
+        rev_anchor_annos = [np.zeros(seq_len,)]
 
         for iv in pos_intervals:
             start, end, class_idx = iv
             dur = end-start
-            
+            dur_samples = np.ceil(dur * anno_sr)
+
             start_idx = int(math.floor(start*anno_sr))
             start_idx = max(min(start_idx, seq_len-1), 0)
-            dur_samples = np.ceil(dur * anno_sr)
-            
+            end_idx = int(math.floor(end*anno_sr))
+            end_idx = max(min(end_idx, seq_len-1), 0)
+
             anchor_anno = get_anchor_anno(start_idx, dur_samples, seq_len)
             anchor_annos.append(anchor_anno)
-            regression_anno[start_idx] = dur
+            regression_annos[start_idx] = dur
+            rev_anchor_anno = get_anchor_anno(end_idx, dur_samples, seq_len)
+            rev_anchor_annos.append(rev_anchor_anno)
+            rev_regression_annos[end_idx] = dur
 
             if class_idx != -1:
-              class_anno[start_idx, class_idx] = 1.
+              class_annos[start_idx, class_idx] = 1.
+              rev_class_annos[end_idx, class_idx] = 1.
             else:
-              class_anno[start_idx, :] = 1./self.n_classes # if unknown, enforce uncertainty
-        
+              class_annos[start_idx, :] = 1./self.n_classes # if unknown, enforce uncertainty
+              rev_class_annos[end_idx, :] = 1./self.n_classes # if unknown, enforce uncertainty
+
         anchor_annos = np.stack(anchor_annos)
         anchor_annos = np.amax(anchor_annos, axis = 0)
-
-        return anchor_annos, regression_anno, class_anno # shapes [time_steps, ], [time_steps, ], [time_steps, n_classes]
+        rev_anchor_annos = np.stack(rev_anchor_annos)
+        rev_anchor_annos = np.amax(rev_anchor_annos, axis = 0)
+        # shapes [time_steps, ], [time_steps, ], [time_steps, n_classes]
+        return anchor_annos, regression_annos, class_annos, rev_anchor_annos, rev_regression_annos, rev_class_annos
 
     def __getitem__(self, index):
         fn, audio_fp, start, end = self.metadata[index]
-        
-        audio, file_sr = librosa.load(audio_fp, sr=None, offset=start, duration=self.clip_duration, mono=self.mono)         
+
+        audio, file_sr = librosa.load(audio_fp, sr=None, offset=start, duration=self.clip_duration, mono=self.mono)
         audio = torch.from_numpy(audio)
-    
+
         audio = audio-torch.mean(audio, -1, keepdim=True)
         if self.amp_aug and self.train:
             audio = self.augment_amplitude(audio)
         if file_sr != self.sr:
-            audio = torchaudio.functional.resample(audio, file_sr, self.sr) 
-        
+            audio = torchaudio.functional.resample(audio, file_sr, self.sr)
+
         audio = crop_and_pad(audio, self.sr, self.clip_duration)
-        
+
         pos_intervals = self.get_pos_intervals(fn, start, end)
-        anchor_anno, regression_anno, class_anno = self.get_annotation(pos_intervals, audio)
+        anchor_anno, regression_anno, class_anno, rev_anchor_anno, rev_regression_anno, rev_class_anno = self.get_annotation(pos_intervals, audio)
 
-        return audio, torch.from_numpy(anchor_anno), torch.from_numpy(regression_anno), torch.from_numpy(class_anno)
+        return audio, torch.from_numpy(anchor_anno), torch.from_numpy(regression_anno), torch.from_numpy(class_anno), torch.from_numpy(rev_anchor_anno), torch.from_numpy(rev_regression_anno), torch.from_numpy(rev_class_anno)
 
     def __len__(self):
         return len(self.metadata)
-      
-      
+
+
 def get_train_dataloader(args, random_seed_shift = 0):
   train_info_fp = args.train_info_fp
   train_info_df = pd.read_csv(train_info_fp)
-  
+
   train_dataset = DetectionDataset(train_info_df, True, args, random_seed_shift = random_seed_shift)
-  
+
   if args.mixup:
     effective_batch_size = args.batch_size*2 # double batch size because half will be discarded before being passed to model
   else:
     effective_batch_size = args.batch_size
-  
-  
+
+
   train_dataloader = DataLoader(train_dataset,
-                                batch_size=effective_batch_size, 
+                                batch_size=effective_batch_size,
                                 shuffle=True,
-                                num_workers=args.num_workers,
-                                pin_memory=True, 
+                                #num_workers=args.num_workers,
+                                num_workers=0,
+                                pin_memory=True,
                                 drop_last = True)
-  
+
   return train_dataloader
 
-  
+
 class SingleClipDataset(Dataset):
     def __init__(self, audio_fp, clip_hop, args, annot_fp = None):
         # waveform (samples,)
@@ -253,26 +261,26 @@ def __init__(self, audio_fp, clip_hop, args, annot_fp = None):
           self.mono = False
         else:
           self.mono = True
-        
+
     def __len__(self):
         return self.num_clips
 
     def __getitem__(self, idx):
         """ Map int idx to dict of torch tensors """
         start = idx * self.clip_hop
-        
+
         audio, file_sr = librosa.load(self.audio_fp, sr=None, offset=start, duration=self.clip_duration, mono=self.mono)
         audio = torch.from_numpy(audio)
-        
-        
+
+
         audio = audio-torch.mean(audio, -1, keepdim=True)
         if file_sr != self.sr:
-          audio = torchaudio.functional.resample(audio, file_sr, self.sr) 
-        
+          audio = torchaudio.functional.resample(audio, file_sr, self.sr)
+
         audio = crop_and_pad(audio, self.sr, self.clip_duration)
-        
+
         return audio
-      
+
 def get_single_clip_data(audio_fp, clip_hop, args, annot_fp = None):
     return DataLoader(
       SingleClipDataset(audio_fp, clip_hop, args, annot_fp = annot_fp),
@@ -284,33 +292,33 @@ def get_single_clip_data(audio_fp, clip_hop, args, annot_fp = None):
     )
 
 def get_val_dataloader(args):
-  val_info_fp = args.val_info_fp  
+  val_info_fp = args.val_info_fp
   val_info_df = pd.read_csv(val_info_fp)
-  
+
   val_dataloaders = {}
-  
+
   for i in range(len(val_info_df)):
     fn = val_info_df.iloc[i]['fn']
     audio_fp = val_info_df.iloc[i]['audio_fp']
     annot_fp = val_info_df.iloc[i]['selection_table_fp']
     val_dataloaders[fn] = get_single_clip_data(audio_fp, args.clip_duration/2, args, annot_fp = annot_fp)
-    
+
   return val_dataloaders
 
 def get_test_dataloader(args):
   test_info_fp = args.test_info_fp
   test_info_df = pd.read_csv(test_info_fp)
-  
+
   test_dataloaders = {}
-  
+
   for i in range(len(test_info_df)):
     fn = test_info_df.iloc[i]['fn']
     audio_fp = test_info_df.iloc[i]['audio_fp']
     annot_fp = test_info_df.iloc[i]['selection_table_fp']
     test_dataloaders[fn] = get_single_clip_data(audio_fp, args.clip_duration/2, args, annot_fp = annot_fp)
-    
+
   return test_dataloaders
-  
+
 def get_anchor_anno(start_idx, dur_samples, seq_len):
   # start times plus gaussian blur
   # std setting follows CornerNet, where adaptive standard deviation is set to 1/3 image radius
@@ -319,4 +327,5 @@ def get_anchor_anno(start_idx, dur_samples, seq_len):
   x = x / (2 * std**2)
   x = np.exp(-x)
   return x
-  
\ No newline at end of file
+
+
diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index 4896ca1..6b3691f 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -15,7 +15,7 @@
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
-def pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_probs, pred_sr):
+def pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_probs, pred_sr, is_rev):
     '''
     detection_peaks, detection_probs, durations, class_idxs, class_probs :
         shape=(num_frames,)
@@ -29,23 +29,27 @@ def pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_pro
     detection_probs_sub = []
     class_idxs_sub = []
     class_probs_sub = []
-            
+
     for i in range(len(detection_peaks)):
         duration = durations[i]
-        start = detection_peaks[i]
-                   
         if duration <= 0:
           continue
-         
-        bbox = [start, start+duration]
+
+        if is_rev:
+            end = detection_peaks[i]
+            bbox = [end-duration, end]
+        else:
+            start = detection_peaks[i]
+            bbox = [start, start+duration]
+
         bboxes.append(bbox)
-        
+
         detection_probs_sub.append(detection_probs[i])
         class_idxs_sub.append(class_idxs[i])
         class_probs_sub.append(class_probs[i])
-        
+
     return np.array(bboxes), np.array(detection_probs_sub), np.array(class_idxs_sub), np.array(class_probs_sub)
-    
+
 def bbox2raven(bboxes, class_idxs, label_set, detection_probs, class_probs, unknown_label):
     '''
     output bounding boxes to a selection table
@@ -55,18 +59,18 @@ def bbox2raven(bboxes, class_idxs, label_set, detection_probs, class_probs, unkn
 
     bboxes: numpy array
         shape=(num_bboxes, 2)
-        
+
     class_idxs: numpy array
         shape=(num_bboxes,)
 
     label_set: list
-    
+
     detection_probs: numpy array
         shape =(num_bboxes,)
-        
+
     class_probs: numpy array
         shape = (num_bboxes,)
-        
+
     unknown_label: str
 
     '''
@@ -74,14 +78,14 @@ def bbox2raven(bboxes, class_idxs, label_set, detection_probs, class_probs, unkn
       return [['Begin Time (s)', 'End Time (s)', 'Annotation', 'Detection Prob', 'Class Prob']]
 
     columns = ['Begin Time (s)', 'End Time (s)', 'Annotation', 'Detection Prob', 'Class Prob']
-    
-    
+
+
     def label_idx_to_label(i):
       if i==-1:
         return unknown_label
       else:
         return label_set[i]
-        
+
     out_data = [[bbox[0], bbox[1], label_idx_to_label(int(c)), dp, cp] for bbox, c, dp, cp in zip(bboxes, class_idxs, detection_probs, class_probs)]
     out_data = sorted(out_data, key=lambda x: x[:2])
 
@@ -110,74 +114,88 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True):
 
   model = model.to(device)
   model.eval()
-  
+
   all_detections = []
   all_regressions = []
-  all_classifications = []
-  
+  all_classifs = []
+  all_rev_detections = []
+  all_rev_regressions = []
+  all_rev_classifs = []
+
   if verbose:
     iterator = tqdm.tqdm(enumerate(single_clip_dataloader), total=len(single_clip_dataloader))
   else:
     iterator = enumerate(single_clip_dataloader)
-  
+
   with torch.no_grad():
     for i, X in iterator:
       X = X.to(device = device, dtype = torch.float)
       X, _, _, _ = rms_and_mixup(X, None, None, None, False, args)
-      
-      detection, regression, classification = model(X)
-      classification = torch.nn.functional.softmax(classification, dim=-1)
-      
+
+      detection, regression, classif, rev_detection, rev_regression, rev_classif  = model(X)
+      classif = torch.nn.functional.softmax(classif, dim=-1)
+      rev_classif = torch.nn.functional.softmax(rev_classif, dim=-1)
+
       all_detections.append(detection)
       all_regressions.append(regression)
-      all_classifications.append(classification)
-      
+      all_classifs.append(classif)
+      all_rev_detections.append(rev_detection)
+      all_rev_regressions.append(rev_regression)
+      all_rev_classifs.append(rev_classif)
+
     all_detections = torch.cat(all_detections)
     all_regressions = torch.cat(all_regressions)
-    all_classifications = torch.cat(all_classifications)
+    all_classifs = torch.cat(all_classifs)
+    all_rev_detections = torch.cat(all_rev_detections)
+    all_rev_regressions = torch.cat(all_rev_regressions)
+    all_rev_classifs = torch.cat(all_rev_classifs)
 
-    # we use half overlapping windows, need to throw away boundary predictions
-    # See get_val_dataloader and get_test_dataloader in data.py
-    
-    ######## Todo: Need better checking that preds are the correct dur    
+
+    ######## Todo: Need better checking that preds are the correct dur
     assert all_detections.size(dim=1) % 2 == 0
     first_quarter_window_dur_samples=all_detections.size(dim=1)//4
     last_quarter_window_dur_samples=(all_detections.size(dim=1)//2)-first_quarter_window_dur_samples
-    
-    # assemble detections
-    beginning_bit = all_detections[0,:first_quarter_window_dur_samples]
-    end_bit = all_detections[-1,-last_quarter_window_dur_samples:]
-    detections_clipped = all_detections[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples]
-    all_detections = torch.reshape(detections_clipped, (-1,))
-    all_detections = torch.cat([beginning_bit, all_detections, end_bit])
-    
-    # assemble regressions
-    beginning_bit = all_regressions[0,:first_quarter_window_dur_samples]
-    end_bit = all_regressions[-1,-last_quarter_window_dur_samples:]
-    regressions_clipped = all_regressions[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples]
-    all_regressions = torch.reshape(regressions_clipped, (-1,))
-    all_regressions = torch.cat([beginning_bit, all_regressions, end_bit])
-    
-    # assemble classifications
-    beginning_bit = all_classifications[0,:first_quarter_window_dur_samples, :]
-    end_bit = all_classifications[-1,-last_quarter_window_dur_samples:, :]
-    classifications_clipped = all_classifications[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples,:]
-    all_classifications = torch.reshape(classifications_clipped, (-1, classifications_clipped.size(-1)))
-    all_classifications = torch.cat([beginning_bit, all_classifications, end_bit])
-    
-  return all_detections.detach().cpu().numpy(), all_regressions.detach().cpu().numpy(), all_classifications.detach().cpu().numpy()
+
+    def assemble(d, r, c):
+        """We use half overlapping windows, need to throw away boundary predictions.
+        See get_val_dataloader and get_test_dataloader in data.py"""
+        # assemble detections
+        beginning_d_bit = d[0,:first_quarter_window_dur_samples]
+        end_d_bit = d[-1,-last_quarter_window_dur_samples:]
+        d_clipped = d[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples]
+        middle_d_bit = torch.reshape(d_clipped, (-1,))
+        assembled_d = torch.cat([beginning_d_bit, middle_d_bit, end_d_bit])
+
+        # assemble regressions
+        beginning_r_bit = r[0,:first_quarter_window_dur_samples]
+        end_r_bit = r[-1,-last_quarter_window_dur_samples:]
+        r_clipped = r[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples]
+        middle_r_bit = torch.reshape(r_clipped, (-1,))
+        assembled_r = torch.cat([beginning_r_bit, middle_r_bit, end_r_bit])
+
+        # assemble classifs
+        beginning_c_bit = c[0,:first_quarter_window_dur_samples, :]
+        end_c_bit = c[-1,-last_quarter_window_dur_samples:, :]
+        c_clipped = c[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples,:]
+        middle_c_bit = torch.reshape(c_clipped, (-1, c_clipped.size(-1)))
+        assembled_c = torch.cat([beginning_c_bit, middle_c_bit, end_c_bit])
+        return assembled_d.detach().cpu().numpy(), assembled_r.detach().cpu().numpy(), assembled_c.detach().cpu().numpy(),
+
+    assembled_dets, assembled_regs, assembled_classifs = assemble(all_detections, all_regressions, all_classifs)
+    assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs = assemble(all_rev_detections, all_rev_regressions, all_rev_classifs)
+    return assembled_dets, assembled_regs, assembled_classifs, assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs
 
 def generate_features(model, single_clip_dataloader, args, verbose = True):
   model = model.to(device)
   model.eval()
-  
+
   all_features = []
-  
+
   if verbose:
     iterator = tqdm.tqdm(enumerate(single_clip_dataloader), total=len(single_clip_dataloader))
   else:
     iterator = enumerate(single_clip_dataloader)
-  
+
   with torch.no_grad():
     for i, X in iterator:
       X = X.to(device = device, dtype = torch.float)
@@ -185,127 +203,130 @@ def generate_features(model, single_clip_dataloader, args, verbose = True):
       features = model.generate_features(X)
       all_features.append(features)
     all_features = torch.cat(all_features)
-    
-    ######## Need better checking that features are the correct dur    
+
+    ######## Need better checking that features are the correct dur
     assert all_features.size(dim=1) % 2 == 0
     first_quarter_window_dur_samples=all_features.size(dim=1)//4
     last_quarter_window_dur_samples=(all_features.size(dim=1)//2)-first_quarter_window_dur_samples
-    
+
     # assemble features
     beginning_bit = all_features[0,:first_quarter_window_dur_samples,:]
     end_bit = all_features[-1,-last_quarter_window_dur_samples:,:]
     features_clipped = all_features[:,first_quarter_window_dur_samples:-last_quarter_window_dur_samples,:]
     all_features = torch.reshape(features_clipped, (-1, features_clipped.size(-1)))
     all_features = torch.cat([beginning_bit, all_features, end_bit])
-    
+
   return all_features.detach().cpu().numpy()
 
-def export_to_selection_table(detections, regressions, classifications, fn, args, verbose=True, target_dir=None, detection_threshold = 0.5, classification_threshold = 0):
-    
+#def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0):
+def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, classif_threshold=0):
+
   if target_dir is None:
-    target_dir = args.experiment_output_dir  
+    target_dir = args.experiment_output_dir
 
+  if is_rev:
+    fn += '-rev'
 #   Debugging
 #
-#   target_fp = os.path.join(target_dir, f"detections_{fn}.npy")
-#   np.save(target_fp, detections)
-  
-#   target_fp = os.path.join(target_dir, f"regressions_{fn}.npy")
-#   np.save(target_fp, regressions)
-  
-#   target_fp = os.path.join(target_dir, f"classifications_{fn}.npy")
-#   np.save(target_fp, classifications)
-  
-  ## peaks  
-  detection_peaks, properties = find_peaks(detections, height = detection_threshold, distance=args.peak_distance)
-  detection_probs = properties['peak_heights']
-    
-  ## regressions and classifications
+#   target_fp = os.path.join(target_dir, f"dets_{fn}.npy")
+#   np.save(target_fp, dets)
+
+#   target_fp = os.path.join(target_dir, f"regs_{fn}.npy")
+#   np.save(target_fp, regs)
+
+#   target_fp = os.path.join(target_dir, f"classifs_{fn}.npy")
+#   np.save(target_fp, classifs)
+
+  ## peaks
+  det_peaks, properties = find_peaks(dets, height=args.detection_threshold, distance=args.peak_distance)
+  det_probs = properties['peak_heights']
+
+  ## regs and classifs
   durations = []
   class_idxs = []
   class_probs = []
-  
-  for i in detection_peaks:
-    dur = regressions[i]
+
+  for i in det_peaks:
+    dur = regs[i]
     durations.append(dur)
-    
-    c = np.argmax(classifications[i,:])    
-    p = classifications[i,c]
-    
-    if p < classification_threshold:
+
+    c = np.argmax(classifs[i,:])
+    p = classifs[i,c]
+
+    if p < classif_threshold:
       c = -1
-    
+
     class_idxs.append(c)
     class_probs.append(p)
-        
+
   durations = np.array(durations)
   class_idxs = np.array(class_idxs)
   class_probs = np.array(class_probs)
-  
+
   pred_sr = args.sr // (args.scale_factor * args.prediction_scale_factor)
-  
-  bboxes, detection_probs, class_idxs, class_probs = pred2bbox(detection_peaks, detection_probs, durations, class_idxs, class_probs, pred_sr)
-  
+
+  bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_rev)
+
   if args.nms == "soft_nms":
-    bboxes, detection_probs, class_idxs, class_probs = soft_nms(bboxes, detection_probs, class_idxs, class_probs, sigma = args.soft_nms_sigma, thresh = args.detection_threshold)
+    bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=args.detection_threshold)
   elif args.nms == "nms":
-    bboxes, detection_probs, class_idxs, class_probs = nms(bboxes, detection_probs, class_idxs, class_probs, iou_thresh = args.nms_thresh)
-  
+    bboxes, det_probs, class_idxs, class_probs = nms(bboxes, det_probs, class_idxs, class_probs, iou_thresh=args.nms_thresh)
+
   if verbose:
-    print(f"Found {len(detection_probs)} boxes")
-  
+    print(f"Found {len(det_probs)} boxes")
+
   target_fp = os.path.join(target_dir, f"peaks_pred_{fn}.txt")
-    
-  st = bbox2raven(bboxes, class_idxs, args.label_set, detection_probs, class_probs, args.unknown_label)
+
+  st = bbox2raven(bboxes, class_idxs, args.label_set, det_probs, class_probs, args.unknown_label)
   write_tsv(target_fp, st)
-  
+
   return target_fp
-  
+
 def get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold):
   c = Clip(label_set=args.label_set, unknown_label=args.unknown_label)
-  
+
   c.load_predictions(predictions_fp)
   c.threshold_class_predictions(class_threshold)
   c.load_annotations(annotations_fp, label_mapping = args.label_mapping)
-  
+
   metrics = {}
-  
+
   c.compute_matching(IoU_minimum = iou)
   metrics = c.evaluate()
-  
+
   return metrics
 
 def get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold):
   c = Clip(label_set=args.label_set, unknown_label=args.unknown_label)
-  
+
   c.load_predictions(predictions_fp)
   c.threshold_class_predictions(class_threshold)
   c.load_annotations(annotations_fp, label_mapping = args.label_mapping)
-  
+
   confusion_matrix = {}
-  
+
   c.compute_matching(IoU_minimum = iou)
   confusion_matrix, confusion_matrix_labels = c.confusion_matrix()
-  
+
   return confusion_matrix, confusion_matrix_labels
 
 def summarize_metrics(metrics):
   # metrics (dict) : {fp : fp_metrics}
   # where
   # fp_metrics (dict) : {class_label: {'TP': int, 'FP' : int, 'FN' : int}}
-  
+
   fps = sorted(metrics.keys())
   class_labels = sorted(metrics[fps[0]].keys())
-  
+
   overall = { l: {'TP' : 0, 'FP' : 0, 'FN' : 0} for l in class_labels}
-  
+
   for fp in fps:
     for l in class_labels:
       counts = metrics[fp][l]
       overall[l]['TP'] += counts['TP']
       overall[l]['FP'] += counts['FP']
       overall[l]['FN'] += counts['FN']
-      
+
   for l in class_labels:
     tp = overall[l]['TP']
     fp = overall[l]['FP']
@@ -328,16 +349,16 @@ def summarize_metrics(metrics):
     else:
       f1 = 2*prec*rec / (prec + rec)
     overall[l]['f1'] = f1
-  
+
   return overall
 
 def macro_metrics(summary):
   # summary (dict) : {class_label: {'f1' : float, 'precision' : float, 'recall' : float, 'TP': int, 'FP' : int, 'FN' : int}}
-  
+
   metrics = ['f1', 'precision', 'recall']
-  
+
   macro = {}
-  
+
   for metric in metrics:
 
     e = []
@@ -345,11 +366,11 @@ def macro_metrics(summary):
         m = summary[l][metric]
         e.append(m)
     macro[metric] = float(np.mean(e))
-  
+
   return macro
 
 def plot_confusion_matrix(data, label_names, target_dir, name=""):
-  
+
     fig = plt.figure(num=None, figsize=(12, 8), dpi=80, facecolor='w', edgecolor='k')
     plt.clf()
     ax = fig.add_subplot(111)
@@ -363,7 +384,7 @@ def plot_confusion_matrix(data, label_names, target_dir, name=""):
     ax.set_ylabel('Prediction')
     ax.set_xlabel('Annotation')
     plt.title(name)
-    
+
     plt.savefig(os.path.join(target_dir, f"{name}_confusion_matrix.svg"))
     plt.close()
 
@@ -372,66 +393,79 @@ def summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels):
   # confusion_matrix (dict) : {fp : fp_cm}
   # where
   # fp_cm  : numpy array
-  
+
   fps = sorted(confusion_matrix.keys())
   l = len(confusion_matrix_labels)
-  
+
   overall = np.zeros((l, l))
-  
+
   for fp in fps:
     overall += confusion_matrix[fp]
-  
+
   return overall, confusion_matrix_labels
 
 def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   fns = []
   predictions_fps = []
+  rev_predictions_fps = []
   annotations_fps = []
-                               
+
   for fn in dataloader_dict:
-    detections, regressions, classifications = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose)
-    
-    predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, verbose = verbose, detection_threshold = args.detection_threshold)
-    
+    detections, regressions, classifications, rev_detections, rev_regressions, rev_classifications  = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose)
+
+    predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose)
+    rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose)
+
     annotations_fp = dataloader_dict[fn].dataset.annot_fp
-    
+
     fns.append(fn)
     predictions_fps.append(predictions_fp)
+    rev_predictions_fps.append(rev_predictions_fp)
     annotations_fps.append(annotations_fp)
-    
-  manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'annotations_fp' : annotations_fps})
+
+  manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'annotations_fp' : annotations_fps})
   return manifest
-                                  
+
 def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0):
-  
+
   metrics = {}
   confusion_matrix = {}
-  
+  rev_metrics = {}
+  rev_confusion_matrix = {}
+
   for i, row in manifest.iterrows():
     fn = row['filename']
     predictions_fp = row['predictions_fp']
+    rev_predictions_fp = row['rev_predictions_fp']
     annotations_fp = row['annotations_fp']
-  
+
     metrics[fn] = get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold)
+    rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
     confusion_matrix[fn], confusion_matrix_labels = get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold)
-    
+    rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
+
   if output_dir is not None:
     if not os.path.exists(output_dir):
       os.makedirs(output_dir)
-  
+
   # summarize and save metrics
   summary = summarize_metrics(metrics)
   metrics['summary'] = summary
   macro = macro_metrics(summary)
   metrics['macro'] = macro
+  rev_summary = summarize_metrics(rev_metrics)
+  rev_metrics['summary'] = rev_summary
+  rev_macro = macro_metrics(rev_summary)
+  rev_metrics['macro'] = rev_macro
   if output_dir is not None:
     metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml')
     with open(metrics_fp, 'w') as f:
       yaml.dump(metrics, f)
-  
+
   # summarize and save confusion matrix
   confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels)
+  rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels)
   if output_dir is not None:
-    plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}")  
-  
-  return metrics, confusion_matrix_summary
+    plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}")
+
+  return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary
diff --git a/voxaboxen/model/model.py b/voxaboxen/model/model.py
index b53ee2f..5fe4439 100644
--- a/voxaboxen/model/model.py
+++ b/voxaboxen/model/model.py
@@ -32,7 +32,7 @@ def forward(self, sig):
         out = self.model.extract_features(sig)[0][-1]
 
         return out
-      
+
     def freeze(self):
       for param in self.model.encoder.parameters():
           param.requires_grad = False
@@ -40,7 +40,7 @@ def freeze(self):
     def unfreeze(self):
       for param in self.model.encoder.parameters():
           param.requires_grad = True
-      
+
 class DetectionModel(nn.Module):
   def __init__(self, args, embedding_dim=768):
       super().__init__()
@@ -48,7 +48,8 @@ def __init__(self, args, embedding_dim=768):
       self.args = args
       aves_sr = args.sr // args.scale_factor
       self.detection_head = DetectionHead(args, embedding_dim = embedding_dim)
-      
+      self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim)
+
   def forward(self, x):
       """
       Input
@@ -59,22 +60,24 @@ def forward(self, x):
         class_logits (Tensor): (batch, time, n_classes) (time at 50 Hz, aves_sr)
 
       """
-      
+
       expected_dur_output = math.ceil(x.size(1)/self.args.scale_factor)
-            
+
       x = x-torch.mean(x,axis=1,keepdim=True)
       feats = self.encoder(x)
-      
+
       #aves may be off by 1 sample from expected
       pad = expected_dur_output - feats.size(1)
       if pad>0:
         feats = F.pad(feats, (0,0,0,pad), mode='reflect')
-      
+
       detection_logits, regression, class_logits = self.detection_head(feats)
       detection_probs = torch.sigmoid(detection_logits)
-      
-      return detection_probs, regression, class_logits
-    
+      rev_detection_logits, rev_regression, rev_class_logits = self.rev_detection_head(feats)
+      rev_detection_probs = torch.sigmoid(rev_detection_logits)
+
+      return detection_probs, regression, class_logits, rev_detection_probs, rev_regression, rev_class_logits
+
   def generate_features(self, x):
       """
       Input
@@ -82,22 +85,22 @@ def generate_features(self, x):
       Returns
         features (Tensor): (batch, time) (time at 50 Hz, aves_sr)
       """
-      
+
       expected_dur_output = math.ceil(x.size(-1)/self.args.scale_factor)
-            
+
       x = x-torch.mean(x,axis=-1,keepdim=True)
       feats = self.encoder(x)
-      
+
       #aves may be off by 1 sample from expected
       pad = expected_dur_output - feats.size(1)
       if pad>0:
         feats = F.pad(feats, (0,0,0,pad), mode='reflect')
-        
+
       return feats
-    
+
   def freeze_encoder(self):
       self.encoder.freeze()
-          
+
   def unfreeze_encoder(self):
       self.encoder.unfreeze()
 
@@ -107,7 +110,7 @@ def __init__(self, args, embedding_dim=768):
       self.n_classes = len(args.label_set)
       self.head = nn.Conv1d(embedding_dim, 2+self.n_classes, args.prediction_scale_factor, stride=args.prediction_scale_factor, padding=0)
       self.args=args
-      
+
   def forward(self, x):
       """
       Input
@@ -121,15 +124,15 @@ def forward(self, x):
       x = rearrange(x, 'b t c -> b c t')
       x = self.head(x)
       x = rearrange(x, 'b c t -> b t c')
-      detection_logits = x[:,:,0]      
+      detection_logits = x[:,:,0]
       reg = x[:,:,1]
       class_logits = x[:,:,2:]
       return detection_logits, reg, class_logits
-    
+
 class DetectionModelStereo(DetectionModel):
   def __init__(self, args, embedding_dim=768):
       super().__init__(args, embedding_dim=2*embedding_dim)
-      
+
   def forward(self, x):
     """
     Input
@@ -140,9 +143,9 @@ def forward(self, x):
       class_logits (Tensor): (batch, time, n_classes) (time at 50 Hz, aves_sr)
 
     """
-    
+
     expected_dur_output = math.ceil(x.size(-1)/self.args.scale_factor)
-          
+
     x = x-torch.mean(x,axis=-1,keepdim=True)
     feats0 = self.encoder(x[:,0,:])
     feats1 = self.encoder(x[:,1,:])
@@ -152,12 +155,12 @@ def forward(self, x):
     pad = expected_dur_output - feats.size(1)
     if pad>0:
       feats = F.pad(feats, (0,0,0,pad), mode='reflect')
-    
+
     detection_logits, regression, class_logits = self.detection_head(feats)
     detection_probs = torch.sigmoid(detection_logits)
-    
+
     return detection_probs, regression, class_logits
-  
+
 
 def rms_and_mixup(X, d, r, y, train, args):
   if args.rms_norm:
@@ -165,31 +168,31 @@ def rms_and_mixup(X, d, r, y, train, args):
     ms = ms + torch.full_like(ms, 1e-6)
     rms = ms ** (-1/2)
     X = X * rms
-    
+
   if args.mixup and train:
     # TODO: For mixup, add in a check that there aren't extremely overlapping vocs
-    
+
     batch_size = X.size(0)
-    
+
     mask = torch.full((X.size(0),1,1), 0.5, device=X.device)
     mask = torch.bernoulli(mask)
-    
+
     if len(X.size()) == 2:
         X_aug = torch.flip(X, (0,)) * mask[:,:,0]
     elif  len(X.size()) == 3:
         X_aug = torch.flip(X, (0,)) * mask
-        
+
     d_aug = torch.flip(d, (0,)) * mask[:,:,0]
     r_aug = torch.flip(r, (0,)) * mask[:,:,0]
     y_aug = torch.flip(y, (0,)) * mask
-    
+
     X = (X + X_aug)[:batch_size//2,...]
     d = torch.maximum(d, d_aug)[:batch_size//2,...]
     r = torch.maximum(r, r_aug)[:batch_size//2,...]
     y = torch.maximum(y, y_aug)[:batch_size//2,...]
-    
+
     if args.rms_norm:
       X = X * (1/2)
-    
+
   return X, d, r, y
-      
+
diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py
index 4c01610..6af6ccd 100644
--- a/voxaboxen/training/params.py
+++ b/voxaboxen/training/params.py
@@ -8,10 +8,11 @@
 
 def parse_args(args,allow_unknown=False):
   parser = argparse.ArgumentParser()
-  
+
   # General
   parser.add_argument('--name', type = str, required=True)
   parser.add_argument('--seed', type=int, default=0)
+  parser.add_argument('--is_test', '-t', action='store_true')
 
   # Data
   parser.add_argument('--project-config-fp', type = str, required=True)
@@ -19,7 +20,7 @@ def parse_args(args,allow_unknown=False):
   parser.add_argument('--clip-hop', type=float, default=None, help = "clip hop, in seconds. If None, automatically set to be half clip duration. Used only during training; clip hop is automatically set to be 1/2 clip duration for inference")
   parser.add_argument('--train-info-fp', type=str, required=False, help = "train info, to override project train info")
   parser.add_argument('--num-workers', type=int, default=8)
-  
+
   # Model
   parser.add_argument('--sr', type=int, default=16000)
   parser.add_argument('--scale-factor', type=int, default = 320, help = "downscaling performed by aves")
@@ -33,11 +34,11 @@ def parse_args(args,allow_unknown=False):
   parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo")
 
   # Training
-  parser.add_argument('--batch-size', type=int, default=32) 
-  parser.add_argument('--lr', type=float, default=.00005) 
+  parser.add_argument('--batch-size', type=int, default=32)
+  parser.add_argument('--lr', type=float, default=.00005)
   parser.add_argument('--n-epochs', type=int, default=50)
   parser.add_argument('--unfreeze-encoder-epoch', type=int, default=3)
-  parser.add_argument('--end-mask-perc', type=float, default = 0.1, help="During training, mask loss from a percentage of the frames on each end of the clip") 
+  parser.add_argument('--end-mask-perc', type=float, default = 0.1, help="During training, mask loss from a percentage of the frames on each end of the clip")
   parser.add_argument('--omit-empty-clip-prob', type=float, default=0, help="if a clip has no annotations, do not use for training with this probability")
   parser.add_argument('--lamb', type=float, default=.04, help="parameter controlling strength regression loss")
   parser.add_argument('--rho', type=float, default = .01, help="parameter controlling strength of classification loss")
@@ -47,31 +48,31 @@ def parse_args(args,allow_unknown=False):
 
   parser.add_argument('--early-stopping', action ="store_true", help="Whether to use early stopping based on val performance")
   parser.add_argument('--pos-loss-weight', type=float, default=1, help="Weights positive component of loss")
-  
+
   # Augmentations
-  parser.add_argument('--amp-aug', action ="store_true", help="Whether to use amplitude augmentation") 
-  parser.add_argument('--amp-aug-low-r', type=float, default = 0.8) 
-  parser.add_argument('--amp-aug-high-r', type=float, default = 1.2) 
-  parser.add_argument('--mixup', action ="store_true", help="Whether to use mixup augmentation") 
-  
+  parser.add_argument('--amp-aug', action ="store_true", help="Whether to use amplitude augmentation")
+  parser.add_argument('--amp-aug-low-r', type=float, default = 0.8)
+  parser.add_argument('--amp-aug-high-r', type=float, default = 1.2)
+  parser.add_argument('--mixup', action ="store_true", help="Whether to use mixup augmentation")
+
   # Inference
   parser.add_argument('--peak-distance', type=float, default=5, help="for finding peaks in detection probability, what radius to use for detecting local maxima. In output frame rate.")
   parser.add_argument('--nms', type = str, default='soft_nms', choices = ['none', 'nms', 'soft_nms'], help="Whether to apply additional nms after finding peaks")
   parser.add_argument('--soft-nms-sigma', type = float, default = 0.5)
   parser.add_argument('--soft-nms-thresh', type = float, default = 0.001)
   parser.add_argument('--nms-thresh', type = float, default = 0.5)
-  
+
   if allow_unknown:
     args, remaining = parser.parse_known_args(args)
   else:
     args = parser.parse_args(args)
-  
+
   args = read_config(args)
   check_config(args)
 
   if args.clip_hop is None:
     setattr(args, "clip_hop", args.clip_duration/2)
-  
+
   if allow_unknown:
     return args, remaining
   else:
@@ -80,10 +81,10 @@ def parse_args(args,allow_unknown=False):
 def read_config(args):
   with open(args.project_config_fp, 'r') as f:
     project_config = yaml.safe_load(f)
-    
+
   for key in project_config:
     setattr(args,key,project_config[key])
-    
+
   return args
 
 def set_seed(seed):
@@ -104,7 +105,7 @@ def save_params(args):
 
   with open(params_file, "w") as f:
     yaml.dump(args_dict, f)
-      
+
 def load_params(fp):
   with open(fp, 'r') as f:
     args_dict = yaml.safe_load(f)
@@ -118,4 +119,4 @@ def load_params(fp):
 
 def check_config(args):
   assert args.end_mask_perc < 0.25, "Masking above 25% of each end during training will interfere with inference"
-  assert ((args.clip_duration * args.sr)/(4*args.scale_factor)).is_integer(), "Must pick clip duration to ensure no rounding errors during inference"
\ No newline at end of file
+  assert ((args.clip_duration * args.sr)/(4*args.scale_factor)).is_integer(), "Must pick clip duration to ensure no rounding errors during inference"
diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py
index 5bb7987..6a230c4 100644
--- a/voxaboxen/training/train.py
+++ b/voxaboxen/training/train.py
@@ -21,25 +21,25 @@
 
 def train(model, args):
   model = model.to(device)
-  
+
   if args.previous_checkpoint_fp is not None:
     print(f"loading model weights from {args.previous_checkpoint_fp}")
     cp = torch.load(args.previous_checkpoint_fp)
     model.load_state_dict(cp["model_state_dict"])
-  
+
   detection_loss_fn = modified_focal_loss
   reg_loss_fn = get_reg_loss_fn(args)
-  
+
   class_loss_fn = get_class_loss_fn(args)
-  
+
   optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad = True)
   # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.step_size, gamma=0.1, last_epoch=- 1, verbose=False)
   scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs, eta_min=0, last_epoch=- 1, verbose=False)
-  
+
   train_evals = []
   learning_rates = []
   val_evals = []
-  
+
   if args.early_stopping:
     assert args.val_info_fp is not None
     best_f1 = 0
@@ -49,24 +49,25 @@ def train(model, args):
     use_val = True
   else:
     use_val = False
-      
+
   for t in range(args.n_epochs):
       print(f"Epoch {t}\n-------------------------------")
       train_dataloader = get_train_dataloader(args, random_seed_shift = t) # reinitialize dataloader with different negatives each epoch
       model, train_eval = train_epoch(model, t, train_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, optimizer, args)
       train_evals.append(train_eval.copy())
       learning_rates.append(optimizer.param_groups[0]["lr"])
-      
+
       train_evals_by_epoch = {i : e for i, e in enumerate(train_evals)}
       train_evals_fp = os.path.join(args.experiment_dir, "train_history.yaml")
       with open(train_evals_fp, 'w') as f:
         yaml.dump(train_evals_by_epoch, f)
-        
+
       if use_val:
-        val_eval = val_epoch(model, t, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args)
+        val_eval, rev_eval = val_epoch(model, t, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args)
+        # TODO: maybe plot rev-evals
         val_evals.append(val_eval.copy())
         plot_eval(train_evals, learning_rates, args, val_evals = val_evals)
-        
+
         val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)}
         val_evals_fp = os.path.join(args.experiment_dir, "val_history.yaml")
         with open(val_evals_fp, 'w') as f:
@@ -74,7 +75,7 @@ def train(model, args):
       else:
         plot_eval(train_evals, learning_rates, args)
       scheduler.step()
-      
+
       if use_val and args.early_stopping:
         current_f1 = val_eval['f1']
         if current_f1 > best_f1:
@@ -89,13 +90,13 @@ def train(model, args):
           "train_evals": train_evals,
           "val_evals" : val_evals
           }
-          
+
           torch.save(
               checkpoint_dict,
               os.path.join(args.experiment_dir, f"model.pt"),
-          ) 
-          
-      else:  
+          )
+
+      else:
         checkpoint_dict = {
         "epoch": t,
         "model_state_dict": model.state_dict(),
@@ -104,110 +105,155 @@ def train(model, args):
         "train_evals": train_evals,
         "val_evals" : val_evals
         }
-        
+
         torch.save(
               checkpoint_dict,
               os.path.join(args.experiment_dir, f"model.pt"),
-          ) 
-          
-  
+          )
+
+
   print("Done!")
-  
+
   cp = torch.load(os.path.join(args.experiment_dir, f"model.pt"))
   model.load_state_dict(cp["model_state_dict"])
-  
+
   # resave validation with best model
   if use_val:
     val_epoch(model, t+1, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args)
-  
-  return model  
-  
+
+  return model
+
+def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn):
+    end_mask_perc = args.end_mask_perc
+    end_mask_dur = int(det_preds.size(1)*end_mask_perc)
+
+    det_preds_clipped = det_preds[:,end_mask_dur:-end_mask_dur]
+    dets_clipped = dets[:,end_mask_dur:-end_mask_dur]
+
+    reg_preds_clipped = reg_preds[:,end_mask_dur:-end_mask_dur]
+    regs_clipped = regs[:,end_mask_dur:-end_mask_dur]
+
+    #y_preds_clipped = y_preds[:,end_mask_dur:-end_mask_dur,:]
+    y_clipped = y[:,end_mask_dur:-end_mask_dur,:]
+
+    detection_loss = modified_focal_loss(det_preds_clipped, dets_clipped, pos_loss_weight=args.pos_loss_weight)
+    reg_loss = reg_loss_fn(reg_preds_clipped, regs_clipped, dets_clipped, y_clipped)
+    #class_loss = class_loss_fn(y_preds_clipped, y_clipped, dets_clipped)
+    class_loss = torch.tensor(0)
+    return detection_loss, reg_loss, class_loss
+
 def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, optimizer, args):
     model.train()
     if t < args.unfreeze_encoder_epoch:
       model.freeze_encoder()
     else:
       model.unfreeze_encoder()
-    
-    
+
+
     evals = {}
-    train_loss = 0; losses = []; detection_losses = []; regression_losses = []; class_losses = []
+    normal_train_loss = 0; normal_losses = []; detection_losses = []; regression_losses = []; class_losses = []
+    rev_train_loss = 0; rev_losses = []; rev_detection_losses = []; rev_regression_losses = []; rev_class_losses = []
+    train_loss = 0; losses = []
     data_iterator = tqdm.tqdm(dataloader)
-    for i, (X, d, r, y) in enumerate(data_iterator):
+    for i, (X, d, r, y, rev_d, rev_r, rev_y) in enumerate(data_iterator):
       num_batches_seen = i
       X = X.to(device = device, dtype = torch.float)
       d = d.to(device = device, dtype = torch.float)
       r = r.to(device = device, dtype = torch.float)
       y = y.to(device = device, dtype = torch.float)
-      
+      rev_d = rev_d.to(device = device, dtype = torch.float)
+      rev_r = rev_r.to(device = device, dtype = torch.float)
+      rev_y = rev_y.to(device = device, dtype = torch.float)
+
       X, d, r, y = rms_and_mixup(X, d, r, y, True, args)
-      probs, regression, class_logits = model(X)
-      
+      _, rev_d, rev_r, rev_y = rms_and_mixup(X, rev_d, rev_r, rev_y, True, args)
+      probs, regression, class_logits, rev_probs, rev_regression, rev_class_logits = model(X)
+
       # We mask out loss from each end of the clip, so the model isn't forced to learn to detect events that are partially cut off.
       # This does not affect inference, because during inference we overlap clips at 50%
-      
-      end_mask_perc = args.end_mask_perc
-      end_mask_dur = int(probs.size(1)*end_mask_perc) 
-      
-      d_clipped = d[:,end_mask_dur:-end_mask_dur]
-      probs_clipped = probs[:,end_mask_dur:-end_mask_dur]
-      
-      regression_clipped = regression[:,end_mask_dur:-end_mask_dur]
-      r_clipped = r[:,end_mask_dur:-end_mask_dur]
-      
-      class_logits_clipped = class_logits[:,end_mask_dur:-end_mask_dur,:]
-      y_clipped = y[:,end_mask_dur:-end_mask_dur,:]
-
-      detection_loss = detection_loss_fn(probs_clipped, d_clipped, pos_loss_weight = args.pos_loss_weight)
-      reg_loss = reg_loss_fn(regression_clipped, r_clipped, d_clipped, y_clipped)
-      class_loss = class_loss_fn(class_logits_clipped, y_clipped, d_clipped)
-      
-      loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
+
+      detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, class_logits, y, args=args, reg_loss_fn=reg_loss_fn)
+      rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_class_logits, rev_y, args=args, reg_loss_fn=reg_loss_fn)
+      normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
+      rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss
+      loss = (normal_loss + rev_loss)/2
+      #end_mask_perc = args.end_mask_perc
+      #end_mask_dur = int(probs.size(1)*end_mask_perc)
+
+      #d_clipped = d[:,end_mask_dur:-end_mask_dur]
+      #probs_clipped = probs[:,end_mask_dur:-end_mask_dur]
+
+      #regression_clipped = regression[:,end_mask_dur:-end_mask_dur]
+      #r_clipped = r[:,end_mask_dur:-end_mask_dur]
+
+      #class_logits_clipped = class_logits[:,end_mask_dur:-end_mask_dur,:]
+      #y_clipped = y[:,end_mask_dur:-end_mask_dur,:]
+
+      #detection_loss = detection_loss_fn(probs_clipped, d_clipped, pos_loss_weight = args.pos_loss_weight)
+      #reg_loss = reg_loss_fn(regression_clipped, r_clipped, d_clipped, y_clipped)
+      #class_loss = class_loss_fn(class_logits_clipped, y_clipped, d_clipped)
+
+      #loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
       train_loss += loss.item()
+      rev_train_loss += rev_loss.item()
+      normal_train_loss += normal_loss.item()
+      normal_losses.append(normal_loss.item())
+      rev_losses.append(rev_loss.item())
       losses.append(loss.item())
       detection_losses.append(detection_loss.item())
       regression_losses.append(args.lamb * reg_loss.item())
       class_losses.append(args.rho * class_loss.item())
-      
+      rev_detection_losses.append(rev_detection_loss.item())
+      rev_regression_losses.append(args.lamb * rev_reg_loss.item())
+      rev_class_losses.append(args.rho * rev_class_loss.item())
+
+      #if i > 150:
+          #breakpoint()
       # Backpropagation
       optimizer.zero_grad()
       loss.backward()
-      
+
       optimizer.step()
       if i > 10:
-        data_iterator.set_description(f"Loss {np.mean(losses[-10:]):.7f}, Detection Loss {np.mean(detection_losses[-10:]):.7f}, Regression Loss {np.mean(regression_losses[-10:]):.7f}, Classification Loss {np.mean(class_losses[-10:]):.7f}")
-    
+        data_iterator.set_description(f"loss {np.mean(losses[-10:]):.6f}, det {np.mean(detection_losses[-10:]):.6f}, reg {np.mean(regression_losses[-10:]):.6f}, class {np.mean(class_losses[-10:]):.6f} revloss {np.mean(rev_losses[-10:]):.6f}, revdet {np.mean(rev_detection_losses[-10:]):.6f}, revreg {np.mean(rev_regression_losses[-10:]):.6f}, revclass {np.mean(rev_class_losses[-10:]):.6f}")
+
+      if args.is_test and i == 15: break
+
     train_loss = train_loss / num_batches_seen
     evals['loss'] = float(train_loss)
-    
+
     print(f"Epoch {t} | Train loss: {train_loss:1.3f}")
     return model, evals
-                        
+
 def val_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args):
     model.eval()
-    
+
     manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False)
-    e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
-        
+    e, _, rev_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
+
     summary = e['summary']
-    
+
     evals = {k:[] for k in ['precision','recall','f1']}
+    rev_evals = {k:[] for k in ['precision','recall','f1']}
     for k in ['precision','recall','f1']:
       for l in args.label_set:
-        m = summary[l][k]
+        m = e['summary'][l][k]
+        rev_m = rev_e['summary'][l][k]
         evals[k].append(m)
+        rev_evals[k].append(rev_m)
       evals[k] = float(np.mean(evals[k]))
-        
-    print(f"Epoch {t} | Val scores @{args.model_selection_iou}IoU: Precision: {evals['precision']:1.3f} Recall: {evals['recall']:1.3f} F1: {evals['f1']:1.3f}")
-    return evals
+      rev_evals[k] = float(np.mean(rev_evals[k]))
+
+    print(f"Epoch {t} | Val scores @{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} rev_prec: {rev_evals['precision']:1.3f} rev_rec: {rev_evals['recall']:1.3f} rev_F1: {rev_evals['f1']:1.3f}")
+    return evals, rev_evals
 
 def modified_focal_loss(pred, gt, pos_loss_weight = 1):
   # Modified from https://github.com/xingyizhou/CenterNet/blob/2b7692c377c6686fb35e473dac2de6105eed62c6/src/lib/models/losses.py
-  ''' 
+  '''
       pred [batch, time,]
       gt [batch, time,]
-  ''' 
-  
+  '''
+
   pos_inds = gt.eq(1).float()
   neg_inds = gt.lt(1).float()
 
@@ -217,48 +263,48 @@ def modified_focal_loss(pred, gt, pos_loss_weight = 1):
 
   pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds * pos_loss_weight
   neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
-  
+
   loss = -1.*(neg_loss + pos_loss)
-  
+
   loss = loss.mean()
   return loss
-  
-  
+
+
 def masked_reg_loss(regression, r, d, y, class_weights = None):
   # regression, r (Tensor): [batch, time,]
   # r (Tensor) : [batch, time,], float tensor
   # d (Tensor) : [batch, time,], float tensor
   # y (Tensor) : [batch, time, n_classes]
   # class_weights (Tensor) : [n_classes,]
-    
+
   reg_loss = F.l1_loss(regression, r, reduction='none')
   mask = d.eq(1).float()
-  
+
   reg_loss = reg_loss * mask
-  
+
   if class_weights is not None:
     y = rearrange(y, 'b t c -> b c t')
 
     high_prob = torch.amax(y, dim = 1)
     knowns = high_prob.eq(1).float()
     unknowns = high_prob.lt(1).float()
-  
+
     reg_loss_unknowns = reg_loss * unknowns
-        
+
     class_weights = torch.reshape(class_weights, (1, -1, 1))
     class_weights = y * class_weights
     class_weights = torch.amax(class_weights, dim = 1)
-    
+
     reg_loss_knowns = reg_loss * knowns * class_weights
-    
+
     reg_loss = reg_loss_unknowns + reg_loss_knowns
-    
+
   reg_loss = torch.sum(reg_loss)
   n_pos = mask.sum()
-  
+
   if n_pos>0:
     reg_loss = reg_loss / n_pos
-    
+
   return reg_loss
 
 def masked_classification_loss(class_logits, y, d, class_weights = None):
@@ -266,41 +312,41 @@ def masked_classification_loss(class_logits, y, d, class_weights = None):
   # y (Tensor): [batch, time,n_classes]
   # d (Tensor) : [batch, time,], float tensor
   # class_weight : [n_classes,], float tensor
-  
+
   class_logits = rearrange(class_logits, 'b t c -> b c t')
   y = rearrange(y, 'b t c -> b c t')
-  
+
   high_prob = torch.amax(y, dim = 1)
   knowns = high_prob.eq(1).float()
   unknowns = high_prob.lt(1).float()
-  
+
   mask = d.eq(1).float() # mask out time steps where no event is present
-  
+
   known_class_loss = F.cross_entropy(class_logits, y, weight=class_weights, reduction='none')
   known_class_loss = known_class_loss * mask * knowns
   known_class_loss = torch.sum(known_class_loss)
-  
+
   unknown_class_loss = F.cross_entropy(class_logits, y, weight=None, reduction='none')
   unknown_class_loss = unknown_class_loss * mask * unknowns
   unknown_class_loss = torch.sum(unknown_class_loss)
-  
+
   class_loss = known_class_loss + unknown_class_loss
   n_pos = mask.sum()
-  
+
   if n_pos>0:
     class_loss = class_loss / n_pos
-    
+
   return class_loss
-  
+
 def get_class_loss_fn(args):
   dataloader_temp = get_train_dataloader(args, random_seed_shift = 0)
   class_proportions = dataloader_temp.dataset.get_class_proportions()
   class_weights = 1. / (class_proportions + 1e-6)
-    
+
   class_weights = (1. / (np.mean(class_weights) + 1e-6)) * class_weights # normalize so average weight = 1
-  
+
   print(f"Using class weights {class_weights}")
-  
+
   class_weights = torch.Tensor(class_weights).to(device)
   return partial(masked_classification_loss, class_weights = class_weights)
 
@@ -308,9 +354,9 @@ def get_reg_loss_fn(args):
   dataloader_temp = get_train_dataloader(args, random_seed_shift = 0)
   class_proportions = dataloader_temp.dataset.get_class_proportions()
   class_weights = 1. / (class_proportions + 1e-6)
-    
+
   class_weights = (1. / (np.mean(class_weights) + 1e-6)) * class_weights # normalize so average weight = 1
-  
+
   class_weights = torch.Tensor(class_weights).to(device)
   return partial(masked_reg_loss, class_weights = class_weights)
-                               
\ No newline at end of file
+
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index ee60a62..a81700a 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -13,36 +13,36 @@ def train_model(args):
   args = parse_args(args)
 
   set_seed(args.seed)
-  
+
   experiment_dir = os.path.join(args.project_dir, args.name)
   setattr(args, 'experiment_dir', str(experiment_dir))
   if not os.path.exists(args.experiment_dir):
     os.makedirs(args.experiment_dir)
-    
+
   experiment_output_dir = os.path.join(experiment_dir, "outputs")
   setattr(args, 'experiment_output_dir', experiment_output_dir)
   if not os.path.exists(args.experiment_output_dir):
     os.makedirs(args.experiment_output_dir)
-  
+
   save_params(args)
   if hasattr(args,'stereo') and args.stereo:
     model = DetectionModelStereo(args)
   else:
     model = DetectionModel(args)
-  
+
   ## Training
-  trained_model = train(model, args) 
-  
+  trained_model = train(model, args)
+
   ## Evaluation
   test_dataloader = get_test_dataloader(args)
-  
+
   manifest = predict_and_generate_manifest(trained_model, test_dataloader, args)
-  
+
   for iou in [0.2, 0.5, 0.8]:
     for class_threshold in [0.0, 0.5, 0.95]:
-      evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold) 
+      metrics, conf_mat, rev_metrics, rev_conf_mat = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold)
 
 if __name__ == "__main__":
   train_model(sys.argv[1:])
-  
+
 # python main.py --name=debug --lr=0.0001 --n-epochs=6 --clip-duration=4 --batch-size=100 --omit-empty-clip-prob=0.5 --clip-hop=2

From 059d0474f48542e550671233fda0b640b9a67691 Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Tue, 9 Apr 2024 16:58:07 +0100
Subject: [PATCH 02/11] implemente combining both directions, seems to be
 working well on MT

---
 voxaboxen/evaluation/evaluation.py  | 64 ++++++++++++++++++++++++++---
 voxaboxen/evaluation/raven_utils.py | 62 ++++++++++++++--------------
 voxaboxen/training/train.py         | 32 ++++++++-------
 voxaboxen/training/train_model.py   |  6 ++-
 4 files changed, 111 insertions(+), 53 deletions(-)

diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index 6b3691f..cd00ea7 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -12,6 +12,7 @@
 from voxaboxen.evaluation.raven_utils import Clip
 from voxaboxen.model.model import rms_and_mixup
 from voxaboxen.evaluation.nms import nms, soft_nms
+plt.switch_backend('agg')
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -408,6 +409,7 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   fns = []
   predictions_fps = []
   rev_predictions_fps = []
+  comb_predictions_fps = []
   annotations_fps = []
 
   for fn in dataloader_dict:
@@ -415,34 +417,79 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
 
     predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose)
     rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose)
+    comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn)
 
     annotations_fp = dataloader_dict[fn].dataset.annot_fp
 
     fns.append(fn)
     predictions_fps.append(predictions_fp)
     rev_predictions_fps.append(rev_predictions_fp)
+    comb_predictions_fps.append(comb_predictions_fp)
     annotations_fps.append(annotations_fp)
 
-  manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'annotations_fp' : annotations_fps})
+  manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'annotations_fp' : annotations_fps})
   return manifest
 
-def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0):
+def combine_fwd_bck_preds(target_dir, fn):
+    fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}.txt')
+    bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-rev.txt')
+    comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt')
+    fwd_preds = pd.read_csv(fwd_preds_fp, sep='\t')
+    bck_preds = pd.read_csv(bck_preds_fp, sep='\t')
+
+    c = Clip()
+    c.load_annotations(fwd_preds_fp)
+    c.load_predictions(bck_preds_fp)
+    c.compute_matching(IoU_minimum=0.5)
+    #comb_preds = fwd_preds.copy()
+    match_preds_list = []
+    for fp, bp in c.matching:
+        match_pred = fwd_preds.loc[fp].copy()
+        bck_pred = bck_preds.iloc[bp]
+        bp_end_time = bck_pred['End Time (s)']
+        match_pred['End Time (s)'] = bp_end_time
+        match_pred['Detection Prob'] = 1 - (1-match_pred['Detection Prob'])*(1-bck_pred['Detection Prob'])
+        match_preds_list.append(match_pred)
+
+    match_preds = pd.DataFrame(match_preds_list)
+    # Now include the union of all that weren't matched
+    fwd_matched_idxs = [m[0] for m in c.matching]
+    bck_matched_idxs = [m[1] for m in c.matching]
+    fwd_unmatched = select_from_neg_idxs(fwd_preds, fwd_matched_idxs)
+    bck_unmatched = select_from_neg_idxs(bck_preds, bck_matched_idxs)
+    comb_preds = pd.concat([match_preds, fwd_unmatched, bck_unmatched])
+    assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching)
+    comb_preds.sort_values('Begin Time (s)')
+    comb_preds.index = list(range(len(comb_preds)))
+
+    comb_preds.to_csv(comb_preds_fp, sep='\t', index=False)
+    return comb_preds_fp
+
+def select_from_neg_idxs(df, neg_idxs):
+    bool_mask = [i not in neg_idxs for i in range(len(df))]
+    return df.loc[bool_mask]
 
+def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0):
   metrics = {}
   confusion_matrix = {}
   rev_metrics = {}
   rev_confusion_matrix = {}
+  comb_metrics = {}
+  comb_confusion_matrix = {}
 
   for i, row in manifest.iterrows():
     fn = row['filename']
     predictions_fp = row['predictions_fp']
     rev_predictions_fp = row['rev_predictions_fp']
+    comb_predictions_fp = row['comb_predictions_fp']
     annotations_fp = row['annotations_fp']
 
     metrics[fn] = get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold)
-    rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
     confusion_matrix[fn], confusion_matrix_labels = get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold)
+    rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
     rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
+    comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
+    comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
 
   if output_dir is not None:
     if not os.path.exists(output_dir):
@@ -457,6 +504,10 @@ def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, cla
   rev_metrics['summary'] = rev_summary
   rev_macro = macro_metrics(rev_summary)
   rev_metrics['macro'] = rev_macro
+  comb_summary = summarize_metrics(comb_metrics)
+  comb_metrics['summary'] = comb_summary
+  comb_macro = macro_metrics(comb_summary)
+  comb_metrics['macro'] = comb_macro
   if output_dir is not None:
     metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml')
     with open(metrics_fp, 'w') as f:
@@ -465,7 +516,8 @@ def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, cla
   # summarize and save confusion matrix
   confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels)
   rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels)
-  if output_dir is not None:
-    plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}")
+  comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels)
+  #if output_dir is not None:
+    #plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}")
 
-  return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary
+  return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary
diff --git a/voxaboxen/evaluation/raven_utils.py b/voxaboxen/evaluation/raven_utils.py
index 7178452..af4fea6 100644
--- a/voxaboxen/evaluation/raven_utils.py
+++ b/voxaboxen/evaluation/raven_utils.py
@@ -20,58 +20,58 @@ def __init__(self, label_set = None, unknown_label = None):
         self.matched_predictions = None
         self.label_set = label_set
         self.unknown_label = unknown_label
-        
+
     def load_selection_table(self, fp, view = None, label_mapping = None):
         # view (str) : If applicable, Waveform or Spectrogram to avoid double counting
         # label_mapping : dict {old label : new label}. If not None, will drop annotations not in keys of label_mapping
-      
-      
+
+
         annotations = pd.read_csv(fp, delimiter = '\t')
         if view is None and 'View' in annotations:
           views = annotations['View'].unique()
           if len(views)>1:
             warnings.warn(f"I found more than one view in selection table. To avoid double counting, pass view as a parameter. Views found: {view}")
-        
+
         if view is not None:
           annotations = annotations[annotations['View'].str.contains('Waveform')].reset_index()
-        
+
         if label_mapping is not None:
           annotations['Annotation'] = annotations['Annotation'].map(label_mapping)
           annotations = annotations[~pd.isnull(annotations['Annotation'])]
-          
+
         return annotations
-        
+
     def load_audio(self, fp):
         self.samples, self.sr = librosa.load(fp, sr = None)
         self.duration = len(self.samples) / self.sr
-        
+
     def play_audio(self, start_sec, end_sec):
         start_sample = int(self.sr * start_sec)
         end_sample = int(self.sr *end_sec)
         display(ipd.Audio(self.samples[start_sample:end_sample], rate = self.sr))
-        
+
     def load_annotations(self, fp, view = None, label_mapping = None):
         self.annotations = self.load_selection_table(fp, view = view, label_mapping = label_mapping)
         self.annotations['index'] = self.annotations.index
-        
+
     def threshold_class_predictions(self, class_threshold):
         # If class probability is below a threshold, switch label to unknown
-      
+
         assert self.unknown_label is not None
         for i in self.predictions.index:
           if self.predictions.loc[i, 'Class Prob'] < class_threshold:
-            self.predictions.at[i, 'Annotation'] = self.unknown_label        
-        
+            self.predictions.at[i, 'Annotation'] = self.unknown_label
+
     def refine_annotations(self):
         print("Not implemented! Could implement refining annotations by SNR to remove quiet vocs")
-        
+
     def refine_predictions(self):
         print("Not implemented! Could implement refining predictions by SNR to remove quiet vocs")
-        
+
     def load_predictions(self, fp, view = None, label_mapping = None):
         self.predictions = self.load_selection_table(fp, view = view, label_mapping = label_mapping)
         self.predictions['index'] = self.predictions.index
-        
+
     def compute_matching(self, IoU_minimum = 0.5):
         # Bipartite graph matching between predictions and annotations
         # Maximizes the number of matchings with IoU > IoU_minimum
@@ -81,15 +81,15 @@ def compute_matching(self, IoU_minimum = 0.5):
         self.matching = metrics.match_events(ref, est, min_iou=IoU_minimum, method="fast")
         self.matched_annotations = [p[0] for p in self.matching]
         self.matched_predictions = [p[1] for p in self.matching]
-        
-    def evaluate(self):     
-      
+
+    def evaluate(self):
+
         if self.label_set is None:
           TP = len(self.matching)
           FP = len(self.predictions) - TP
           FN = len(self.annotations) - TP
           return {'all' : {'TP' : TP, 'FP' : FP, 'FN' : FN}}
-        
+
         else:
           out = {label : {'TP':0, 'FP':0, 'FN' : 0} for label in self.label_set}
           pred_label = np.array(self.predictions['Annotation'])
@@ -97,22 +97,22 @@ def evaluate(self):
           for p in self.matching:
             annotation = annot_label[p[0]]
             prediction = pred_label[p[1]]
-            
+
             if self.unknown_label is not None and prediction == self.unknown_label:
               pass # treat predicted unknowns as no predictions for these metrics
             elif annotation == prediction:
               out[annotation]['TP'] += 1
             elif self.unknown_label is not None and annotation == self.unknown_label:
               out[prediction]['FP'] -= 1 #adjust FP for unknown labels
-              
+
           for label in self.label_set:
             n_annot = int((annot_label == label).sum())
             n_pred = int((pred_label == label).sum())
             out[label]['FP'] = out[label]['FP'] + n_pred - out[label]['TP']
             out[label]['FN'] = out[label]['FN'] + n_annot - out[label]['TP']
-            
+
           return out
-              
+
     def confusion_matrix(self):
       if self.label_set is None:
         return None
@@ -125,10 +125,10 @@ def confusion_matrix(self):
 
         confusion_matrix = np.zeros((confusion_matrix_size, confusion_matrix_size))
         cm_nobox_idx = confusion_matrix_labels.index('None')
-        
+
         pred_label = np.array(self.predictions['Annotation'])
         annot_label = np.array(self.annotations['Annotation'])
-        
+
         for p in self.matching:
           annotation = annot_label[p[0]]
           prediction = pred_label[p[1]]
@@ -139,21 +139,21 @@ def confusion_matrix(self):
         for label in confusion_matrix_labels:
           if label == 'None':
             continue
-            
+
           # count false positive and false negative detections, regardless of class
           cm_label_idx = confusion_matrix_labels.index(label)
-          
+
           #fp
           n_pred = int((pred_label == label).sum())
           n_positive_detections_row = confusion_matrix.sum(1)[cm_label_idx]
           n_false_detections = n_pred - n_positive_detections_row
           confusion_matrix[cm_label_idx, cm_nobox_idx] = n_false_detections
-          
+
           #fn
           n_annot = int((annot_label == label).sum())
           n_positive_detections_col = confusion_matrix.sum(0)[cm_label_idx]
           n_missed_detections = n_annot - n_positive_detections_col
           confusion_matrix[cm_nobox_idx, cm_label_idx] = n_missed_detections
-          
+
       return confusion_matrix, confusion_matrix_labels
-        
\ No newline at end of file
+
diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py
index 6a230c4..79520d4 100644
--- a/voxaboxen/training/train.py
+++ b/voxaboxen/training/train.py
@@ -63,10 +63,10 @@ def train(model, args):
         yaml.dump(train_evals_by_epoch, f)
 
       if use_val:
-        val_eval, rev_eval = val_epoch(model, t, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args)
+        eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args)
         # TODO: maybe plot rev-evals
-        val_evals.append(val_eval.copy())
-        plot_eval(train_evals, learning_rates, args, val_evals = val_evals)
+        val_evals.append(comb_eval_scores.copy())
+        plot_eval(train_evals, learning_rates, args, val_evals=val_evals)
 
         val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)}
         val_evals_fp = os.path.join(args.experiment_dir, "val_history.yaml")
@@ -77,7 +77,7 @@ def train(model, args):
       scheduler.step()
 
       if use_val and args.early_stopping:
-        current_f1 = val_eval['f1']
+        current_f1 = comb_eval_scores['f1']
         if current_f1 > best_f1:
           print('found new best model')
           best_f1 = current_f1
@@ -93,7 +93,7 @@ def train(model, args):
 
           torch.save(
               checkpoint_dict,
-              os.path.join(args.experiment_dir, f"model.pt"),
+              os.path.join(args.experiment_dir, "model.pt"),
           )
 
       else:
@@ -108,18 +108,18 @@ def train(model, args):
 
         torch.save(
               checkpoint_dict,
-              os.path.join(args.experiment_dir, f"model.pt"),
+              os.path.join(args.experiment_dir, "model.pt"),
           )
 
 
   print("Done!")
 
-  cp = torch.load(os.path.join(args.experiment_dir, f"model.pt"))
+  cp = torch.load(os.path.join(args.experiment_dir, "model.pt"))
   model.load_state_dict(cp["model_state_dict"])
 
   # resave validation with best model
   if use_val:
-    val_epoch(model, t+1, val_dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args)
+    val_epoch(model, t+1, val_dataloader, args)
 
   return model
 
@@ -215,7 +215,7 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss
 
       optimizer.step()
       if i > 10:
-        data_iterator.set_description(f"loss {np.mean(losses[-10:]):.6f}, det {np.mean(detection_losses[-10:]):.6f}, reg {np.mean(regression_losses[-10:]):.6f}, class {np.mean(class_losses[-10:]):.6f} revloss {np.mean(rev_losses[-10:]):.6f}, revdet {np.mean(rev_detection_losses[-10:]):.6f}, revreg {np.mean(rev_regression_losses[-10:]):.6f}, revclass {np.mean(rev_class_losses[-10:]):.6f}")
+        data_iterator.set_description(f"loss {np.mean(losses[-10:]):.5f}, det {np.mean(detection_losses[-10:]):.5f}, reg {np.mean(regression_losses[-10:]):.5f}, class {np.mean(class_losses[-10:]):.5f} revloss {np.mean(rev_losses[-10:]):.5f}, revdet {np.mean(rev_detection_losses[-10:]):.5f}, revreg {np.mean(rev_regression_losses[-10:]):.5f}, revclass {np.mean(rev_class_losses[-10:]):.5f}")
 
       if args.is_test and i == 15: break
 
@@ -225,27 +225,29 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss
     print(f"Epoch {t} | Train loss: {train_loss:1.3f}")
     return model, evals
 
-def val_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, args):
+def val_epoch(model, t, dataloader, args):
     model.eval()
 
     manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False)
-    e, _, rev_e, _ = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
-
-    summary = e['summary']
+    e, _, rev_e, _, comb_e, _  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
 
     evals = {k:[] for k in ['precision','recall','f1']}
     rev_evals = {k:[] for k in ['precision','recall','f1']}
+    comb_evals = {k:[] for k in ['precision','recall','f1']}
     for k in ['precision','recall','f1']:
       for l in args.label_set:
         m = e['summary'][l][k]
         rev_m = rev_e['summary'][l][k]
+        comb_m = comb_e['summary'][l][k]
         evals[k].append(m)
         rev_evals[k].append(rev_m)
+        comb_evals[k].append(comb_m)
       evals[k] = float(np.mean(evals[k]))
       rev_evals[k] = float(np.mean(rev_evals[k]))
+      comb_evals[k] = float(np.mean(comb_evals[k]))
 
-    print(f"Epoch {t} | Val scores @{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} rev_prec: {rev_evals['precision']:1.3f} rev_rec: {rev_evals['recall']:1.3f} rev_F1: {rev_evals['f1']:1.3f}")
-    return evals, rev_evals
+    print(f"Epoch {t} | val@{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}")
+    return evals, rev_evals, comb_evals
 
 def modified_focal_loss(pred, gt, pos_loss_weight = 1):
   # Modified from https://github.com/xingyizhou/CenterNet/blob/2b7692c377c6686fb35e473dac2de6105eed62c6/src/lib/models/losses.py
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index a81700a..0a40039 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -40,7 +40,11 @@ def train_model(args):
 
   for iou in [0.2, 0.5, 0.8]:
     for class_threshold in [0.0, 0.5, 0.95]:
-      metrics, conf_mat, rev_metrics, rev_conf_mat = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold)
+      metrics, conf_mat, rev_metrics, rev_conf_mat, comb_metrics, comb_conf_mat  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold)
+      print(f'IOU: {iou} class_thresh: {class_threshold}')
+      print('Fwd:', metrics['summary'])
+      print('Bck:', rev_metrics['summary'])
+      print('Comb:', comb_metrics['summary'], '\n')
 
 if __name__ == "__main__":
   train_model(sys.argv[1:])

From 289aa3e97830cb7d28cd7c3b4d1452a918e006f6 Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Tue, 9 Apr 2024 18:17:09 +0100
Subject: [PATCH 03/11] add comb_thresh, but haven't tested yet that it's doing
 exactly what it should

---
 voxaboxen/evaluation/evaluation.py | 5 +++--
 voxaboxen/training/params.py       | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index cd00ea7..c176c76 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -417,7 +417,7 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
 
     predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose)
     rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose)
-    comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn)
+    comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold)
 
     annotations_fp = dataloader_dict[fn].dataset.annot_fp
 
@@ -430,7 +430,7 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'annotations_fp' : annotations_fps})
   return manifest
 
-def combine_fwd_bck_preds(target_dir, fn):
+def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
     fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}.txt')
     bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-rev.txt')
     comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt')
@@ -459,6 +459,7 @@ def combine_fwd_bck_preds(target_dir, fn):
     bck_unmatched = select_from_neg_idxs(bck_preds, bck_matched_idxs)
     comb_preds = pd.concat([match_preds, fwd_unmatched, bck_unmatched])
     assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching)
+    comb_preds = comb_preds.loc[comb_preds['Detection Prob']>discard_threshold]
     comb_preds.sort_values('Begin Time (s)')
     comb_preds.index = list(range(len(comb_preds)))
 
diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py
index 6af6ccd..a4775df 100644
--- a/voxaboxen/training/params.py
+++ b/voxaboxen/training/params.py
@@ -32,6 +32,7 @@ def parse_args(args,allow_unknown=False):
   parser.add_argument('--previous-checkpoint-fp', type=str, default=None, help="path to checkpoint of previously trained detection model")
   parser.add_argument('--aves-url', type=str, default = "https://storage.googleapis.com/esp-public-files/ported_aves/aves-base-bio.torchaudio.pt")
   parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo")
+  parser.add_argument('--comb-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold")
 
   # Training
   parser.add_argument('--batch-size', type=int, default=32)

From ca0622d444983de05a0147fbe45a7a83192c1931 Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Sun, 14 Apr 2024 12:59:57 +0100
Subject: [PATCH 04/11] track F1 of matched-preds, and refactor to loop through
 different pred types

---
 voxaboxen/evaluation/evaluation.py | 117 ++++++++++++++++-------------
 voxaboxen/training/train.py        |  54 +++++++------
 voxaboxen/training/train_model.py  |  10 +--
 3 files changed, 103 insertions(+), 78 deletions(-)

diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index c176c76..cb09eac 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -144,6 +144,9 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True):
       all_rev_regressions.append(rev_regression)
       all_rev_classifs.append(rev_classif)
 
+      if args.is_test and i==15:
+        break
+
     all_detections = torch.cat(all_detections)
     all_regressions = torch.cat(all_regressions)
     all_classifs = torch.cat(all_classifs)
@@ -219,14 +222,16 @@ def generate_features(model, single_clip_dataloader, args, verbose = True):
 
   return all_features.detach().cpu().numpy()
 
-#def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0):
-def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=True, target_dir=None, classif_threshold=0):
+#def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0):
+def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, classif_threshold=0):
 
   if target_dir is None:
     target_dir = args.experiment_output_dir
 
-  if is_rev:
-    fn += '-rev'
+  if is_bck:
+    fn += '-bck'
+  else:
+    fn += '-fwd'
 #   Debugging
 #
 #   target_fp = os.path.join(target_dir, f"dets_{fn}.npy")
@@ -266,7 +271,7 @@ def export_to_selection_table(dets, regs, classifs, fn, args, is_rev, verbose=Tr
 
   pred_sr = args.sr // (args.scale_factor * args.prediction_scale_factor)
 
-  bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_rev)
+  bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_bck)
 
   if args.nms == "soft_nms":
     bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=args.detection_threshold)
@@ -407,33 +412,36 @@ def summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels):
 
 def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   fns = []
-  predictions_fps = []
-  rev_predictions_fps = []
+  fwd_predictions_fps = []
+  bck_predictions_fps = []
   comb_predictions_fps = []
+  match_predictions_fps = []
   annotations_fps = []
 
   for fn in dataloader_dict:
-    detections, regressions, classifications, rev_detections, rev_regressions, rev_classifications  = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose)
+    fwd_detections, fwd_regressions, fwd_classifications, bck_detections, bck_regressions, bck_classifications  = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose)
 
-    predictions_fp = export_to_selection_table(detections, regressions, classifications, fn, args, is_rev=False, verbose=verbose)
-    rev_predictions_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifications, fn, args, is_rev=True, verbose=verbose)
-    comb_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold)
+    fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose)
+    bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose)
+    comb_predictions_fp, match_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold)
 
     annotations_fp = dataloader_dict[fn].dataset.annot_fp
 
     fns.append(fn)
-    predictions_fps.append(predictions_fp)
-    rev_predictions_fps.append(rev_predictions_fp)
+    fwd_predictions_fps.append(fwd_predictions_fp)
+    bck_predictions_fps.append(bck_predictions_fp)
     comb_predictions_fps.append(comb_predictions_fp)
+    match_predictions_fps.append(match_predictions_fp)
     annotations_fps.append(annotations_fp)
 
-  manifest = pd.DataFrame({'filename' : fns, 'predictions_fp' : predictions_fps, 'rev_predictions_fp' : rev_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'annotations_fp' : annotations_fps})
+  manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps})
   return manifest
 
 def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
-    fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}.txt')
-    bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-rev.txt')
+    fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-fwd.txt')
+    bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-bck.txt')
     comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt')
+    match_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-match.txt')
     fwd_preds = pd.read_csv(fwd_preds_fp, sep='\t')
     bck_preds = pd.read_csv(bck_preds_fp, sep='\t')
 
@@ -451,7 +459,7 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
         match_pred['Detection Prob'] = 1 - (1-match_pred['Detection Prob'])*(1-bck_pred['Detection Prob'])
         match_preds_list.append(match_pred)
 
-    match_preds = pd.DataFrame(match_preds_list)
+    match_preds = pd.DataFrame(match_preds_list, columns=fwd_preds.columns)
     # Now include the union of all that weren't matched
     fwd_matched_idxs = [m[0] for m in c.matching]
     bck_matched_idxs = [m[1] for m in c.matching]
@@ -464,61 +472,68 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
     comb_preds.index = list(range(len(comb_preds)))
 
     comb_preds.to_csv(comb_preds_fp, sep='\t', index=False)
-    return comb_preds_fp
+    match_preds.to_csv(match_preds_fp, sep='\t', index=False)
+    return comb_preds_fp, match_preds_fp
 
 def select_from_neg_idxs(df, neg_idxs):
     bool_mask = [i not in neg_idxs for i in range(len(df))]
     return df.loc[bool_mask]
 
 def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0):
-  metrics = {}
-  confusion_matrix = {}
-  rev_metrics = {}
-  rev_confusion_matrix = {}
-  comb_metrics = {}
-  comb_confusion_matrix = {}
+  pred_types = ('fwd', 'bck', 'comb', 'match')
+  metrics = {p:{} for p in pred_types}
+  conf_mats = {p:{} for p in pred_types}
+  #conf_mat_labels = {p:{} for p in pred_types}
+  conf_mat_labels = {}
+  #rev_metrics = {}
+  #rev_confusion_matrix = {}
+  #comb_metrics = {}
+  #comb_confusion_matrix = {}
 
   for i, row in manifest.iterrows():
     fn = row['filename']
-    predictions_fp = row['predictions_fp']
-    rev_predictions_fp = row['rev_predictions_fp']
-    comb_predictions_fp = row['comb_predictions_fp']
-    annotations_fp = row['annotations_fp']
-
-    metrics[fn] = get_metrics(predictions_fp, annotations_fp, args, iou, class_threshold)
-    confusion_matrix[fn], confusion_matrix_labels = get_confusion_matrix(predictions_fp, annotations_fp, args, iou, class_threshold)
-    rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
-    rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
-    comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
-    comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
+    annots_fp = row['annotations_fp']
+    for pred_type in pred_types:
+        preds_fp = row[f'{pred_type}_predictions_fp']
+        metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold)
+        conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold)
+    #rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
+    #rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
+    #comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
+    #comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
 
   if output_dir is not None:
     if not os.path.exists(output_dir):
       os.makedirs(output_dir)
 
   # summarize and save metrics
-  summary = summarize_metrics(metrics)
-  metrics['summary'] = summary
-  macro = macro_metrics(summary)
-  metrics['macro'] = macro
-  rev_summary = summarize_metrics(rev_metrics)
-  rev_metrics['summary'] = rev_summary
-  rev_macro = macro_metrics(rev_summary)
-  rev_metrics['macro'] = rev_macro
-  comb_summary = summarize_metrics(comb_metrics)
-  comb_metrics['summary'] = comb_summary
-  comb_macro = macro_metrics(comb_summary)
-  comb_metrics['macro'] = comb_macro
+  conf_mat_summaries = {}
+  for pred_type in ('fwd', 'bck', 'comb', 'match'):
+      summary = summarize_metrics(metrics[pred_type])
+      metrics[pred_type]['summary'] = summary
+      metrics[pred_type]['macro'] = macro_metrics(summary)
+      conf_mat_summaries[pred_type], confusion_matrix_labels = summarize_confusion_matrix(conf_mats[pred_type], conf_mat_labels[pred_type])
+  #macro = macro_metrics(summary)
+  #metrics['macro'] = macro
+  #rev_summary = summarize_metrics(rev_metrics)
+  #rev_metrics['summary'] = rev_summary
+  #rev_macro = macro_metrics(rev_summary)
+  #rev_metrics['macro'] = rev_macro
+  #comb_summary = summarize_metrics(comb_metrics)
+  #comb_metrics['summary'] = comb_summary
+  #comb_macro = macro_metrics(comb_summary)
+  #comb_metrics['macro'] = comb_macro
   if output_dir is not None:
     metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml')
     with open(metrics_fp, 'w') as f:
       yaml.dump(metrics, f)
 
   # summarize and save confusion matrix
-  confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels)
-  rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels)
-  comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels)
+  #confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels)
+  #rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels)
+  #comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels)
   #if output_dir is not None:
     #plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}")
 
-  return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary
+  #return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary
+  return metrics, conf_mat_summaries
diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py
index 79520d4..6fe1a19 100644
--- a/voxaboxen/training/train.py
+++ b/voxaboxen/training/train.py
@@ -63,9 +63,11 @@ def train(model, args):
         yaml.dump(train_evals_by_epoch, f)
 
       if use_val:
-        eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args)
+        #eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args)
+        eval_scores = val_epoch(model, t, val_dataloader, args)
+        #for pt,pt_es in eval_scores.items():
         # TODO: maybe plot rev-evals
-        val_evals.append(comb_eval_scores.copy())
+        val_evals.append(eval_scores['comb'].copy())
         plot_eval(train_evals, learning_rates, args, val_evals=val_evals)
 
         val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)}
@@ -77,7 +79,7 @@ def train(model, args):
       scheduler.step()
 
       if use_val and args.early_stopping:
-        current_f1 = comb_eval_scores['f1']
+        current_f1 = eval_scores['comb']['f1']
         if current_f1 > best_f1:
           print('found new best model')
           best_f1 = current_f1
@@ -229,25 +231,33 @@ def val_epoch(model, t, dataloader, args):
     model.eval()
 
     manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False)
-    e, _, rev_e, _, comb_e, _  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
-
-    evals = {k:[] for k in ['precision','recall','f1']}
-    rev_evals = {k:[] for k in ['precision','recall','f1']}
-    comb_evals = {k:[] for k in ['precision','recall','f1']}
-    for k in ['precision','recall','f1']:
-      for l in args.label_set:
-        m = e['summary'][l][k]
-        rev_m = rev_e['summary'][l][k]
-        comb_m = comb_e['summary'][l][k]
-        evals[k].append(m)
-        rev_evals[k].append(rev_m)
-        comb_evals[k].append(comb_m)
-      evals[k] = float(np.mean(evals[k]))
-      rev_evals[k] = float(np.mean(rev_evals[k]))
-      comb_evals[k] = float(np.mean(comb_evals[k]))
-
-    print(f"Epoch {t} | val@{args.model_selection_iou}IoU: prec: {evals['precision']:1.3f} rec: {evals['recall']:1.3f} F1: {evals['f1']:1.3f} revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}")
-    return evals, rev_evals, comb_evals
+    #e, _, rev_e, _, comb_e, _  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
+    e, _  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
+
+    print(f"Epoch {t} | val@{args.model_selection_iou}IoU:")
+    evals = {}
+    for pt in e.keys():
+        evals[pt] = {k:[] for k in ['precision','recall','f1']}
+    #evals = {k:[] for k in ['precision','recall','f1']}
+    #rev_evals = {k:[] for k in ['precision','recall','f1']}
+    #comb_evals = {k:[] for k in ['precision','recall','f1']}
+        for k in ['precision','recall','f1']:
+          for l in args.label_set:
+            m = e[pt]['summary'][l][k]
+            #rev_m = rev_e['summary'][l][k]
+            #comb_m = comb_e['summary'][l][k]
+            evals[pt][k].append(m)
+            #rev_evals[k].append(rev_m)
+            #comb_evals[k].append(comb_m)
+          evals[pt][k] = float(np.mean(evals[pt][k]))
+          #rev_evals[k] = float(np.mean(rev_evals[k]))
+          #comb_evals[k] = float(np.mean(comb_evals[k]))
+
+        print(f"{pt}prec: {evals[pt]['precision']:1.3f} {pt}rec: {evals[pt]['recall']:1.3f} {pt}F1: {evals[pt]['f1']:1.3f}", end=' ')
+        #revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}")
+    #return evals, rev_evals, comb_evals
+    print()
+    return evals
 
 def modified_focal_loss(pred, gt, pos_loss_weight = 1):
   # Modified from https://github.com/xingyizhou/CenterNet/blob/2b7692c377c6686fb35e473dac2de6105eed62c6/src/lib/models/losses.py
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index 0a40039..62e87c7 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -38,13 +38,13 @@ def train_model(args):
 
   manifest = predict_and_generate_manifest(trained_model, test_dataloader, args)
 
+  class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95]
   for iou in [0.2, 0.5, 0.8]:
-    for class_threshold in [0.0, 0.5, 0.95]:
-      metrics, conf_mat, rev_metrics, rev_conf_mat, comb_metrics, comb_conf_mat  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold)
+    for class_threshold in class_threshes:
+      metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold)
       print(f'IOU: {iou} class_thresh: {class_threshold}')
-      print('Fwd:', metrics['summary'])
-      print('Bck:', rev_metrics['summary'])
-      print('Comb:', comb_metrics['summary'], '\n')
+      for pred_type in metrics.keys():
+          print(f'{pred_type}:', {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()})
 
 if __name__ == "__main__":
   train_model(sys.argv[1:])

From 84994c43e9f411566794b2966669ad7d60545b26 Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Fri, 19 Apr 2024 19:42:30 +0100
Subject: [PATCH 05/11] simple grid search on MT

---
 MT-grid-search.sh   | 12 ++++++++++++
 read_grid_search.py | 26 ++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 MT-grid-search.sh
 create mode 100644 read_grid_search.py

diff --git a/MT-grid-search.sh b/MT-grid-search.sh
new file mode 100644
index 0000000..b8a95cd
--- /dev/null
+++ b/MT-grid-search.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+for detthresh in 0.55; do
+#for detthresh in 0.4; do
+    for combiouthresh in 0.5 0.55 0.6; do
+    #for combiouthresh in 0.4; do
+        for combdiscardthresh in  0.8 0.85 0.9; do
+            combdiscardthresh2=$(echo ${combdiscardthresh}-0.075 | bc -l)
+            (trap 'kill 0' SIGINT; python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh} & python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh2} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh2} & wait) 
+        done
+    done
+done
diff --git a/read_grid_search.py b/read_grid_search.py
new file mode 100644
index 0000000..de3001e
--- /dev/null
+++ b/read_grid_search.py
@@ -0,0 +1,26 @@
+import yaml
+from os.path import join
+import os
+import pandas as pd
+
+
+all_results = {}
+for detthresh in (0.4, 0.55, 0.7, 0.85):
+    all_results[detthresh] = {}
+    for combiouthresh in (0.4, 0.55, 0.7, 0.85):
+        all_results[detthresh][combiouthresh] = {}
+        for combdiscardthresh in  (0.4, 0.55, 0.7, 0.85):
+            all_results[detthresh][combiouthresh][combdiscardthresh] = {}
+            resdir = f'projects/MT_experiment/bidirectional-{detthresh}-{combiouthresh}-{combdiscardthresh}/test_results'
+            if not os.path.exists(resdir):
+                continue
+            results = {}
+            for iouf1 in (2,5,8):
+                with open(join(resdir, f'metrics_iou_0.{iouf1}_class_threshold_0.yaml')) as f:
+                    exp_results = yaml.safe_load(f)
+                for pred_type in ('fwd','bck','comb','match'):
+                   results[f'testiou{iouf1}-{pred_type}'] = exp_results[pred_type]['macro']['f1']
+            all_results[detthresh][combiouthresh][combdiscardthresh] = results
+
+breakpoint()
+

From d7b5b5c9ec3f345237067cd6768b842785b9e34c Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Fri, 19 Apr 2024 19:44:04 +0100
Subject: [PATCH 06/11] get running on powdermill, refactor to use threshes in
 evaluate manifest only

---
 datasets/powdermill/process_powdermill.py |  49 ++++-----
 voxaboxen/evaluation/evaluation.py        | 116 +++++++++-------------
 voxaboxen/project/project_setup.py        |  19 ++--
 voxaboxen/training/params.py              |   5 +-
 voxaboxen/training/train.py               |  35 ++-----
 voxaboxen/training/train_model.py         |  28 ++++--
 6 files changed, 115 insertions(+), 137 deletions(-)

diff --git a/datasets/powdermill/process_powdermill.py b/datasets/powdermill/process_powdermill.py
index 2a52b7d..ed47e56 100644
--- a/datasets/powdermill/process_powdermill.py
+++ b/datasets/powdermill/process_powdermill.py
@@ -11,86 +11,86 @@
 
 def main():
   cwd = os.getcwd()
-  
+
   raw_data_dir = os.path.join(cwd, 'raw')
   raw_annot_dir = os.path.join(raw_data_dir, 'selection_tables')
   audio_dir = os.path.join(cwd, 'soundscape_data')
-  
+
   formatted_data_dir = os.path.join(cwd, 'formatted')
   formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables')
   for d in [formatted_data_dir, formatted_annot_dir]:
     if not os.path.exists(d):
       os.makedirs(d)
-      
+
   raw_annotations_fps = sorted(glob(os.path.join(cwd, 'raw', 'selection_tables', '*.txt')))
-  
+
   train_proportion = 0.6
   val_proportion = 0.2
-  
+
   train_audio_fps = []
   val_audio_fps = []
   test_audio_fps = []
-  
+
   for i in range(1,5):
     audio_fps = sorted(glob(os.path.join(audio_dir, f"Recording_{i}_*")))
     n_train = int(train_proportion * len(audio_fps))
     n_val = int(val_proportion * len(audio_fps))
-    
+
     train_audio_fps.extend(audio_fps[:n_train])
     val_audio_fps.extend(audio_fps[n_train:n_train+n_val])
     test_audio_fps.extend(audio_fps[n_train+n_val:])
-  
-  
+
+
   train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps]
   val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps]
   test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps]
-  
+
   train_annot_fps = []
   val_annot_fps = []
   test_annot_fps = []
-  
+
   for fn, audio_fp in zip(train_fns, train_audio_fps):
     annot_fn = f"{fn}.Table.1.selections.txt"
     raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn)
-    
+
     annot_df = pd.read_csv(raw_annotations_fp, sep = '\t')
     annot_df['Annotation'] = annot_df['Species']
     annot_df = annot_df.drop('Species', axis=1)
-    
+
     annot_fn = f"{fn}.Table.1.selections.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-    
+
     annot_df.to_csv(annot_fp, sep = '\t', index = False)
     train_annot_fps.append(annot_fp)
-    
+
   for fn, audio_fp in zip(val_fns, val_audio_fps):
     annot_fn = f"{fn}.Table.1.selections.txt"
     raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn)
-    
+
     annot_df = pd.read_csv(raw_annotations_fp, sep = '\t')
     annot_df['Annotation'] = annot_df['Species']
     annot_df = annot_df.drop('Species', axis=1)
-    
+
     annot_fn = f"{fn}.Table.1.selections.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-    
+
     annot_df.to_csv(annot_fp, sep = '\t', index = False)
     val_annot_fps.append(annot_fp)
-    
+
   for fn, audio_fp in zip(test_fns, test_audio_fps):
     annot_fn = f"{fn}.Table.1.selections.txt"
     raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn)
-    
+
     annot_df = pd.read_csv(raw_annotations_fp, sep = '\t')
     annot_df['Annotation'] = annot_df['Species']
     annot_df = annot_df.drop('Species', axis=1)
-    
+
     annot_fn = f"{fn}.Table.1.selections.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-    
+
     annot_df.to_csv(annot_fp, sep = '\t', index = False)
     test_annot_fps.append(annot_fp)
-  
+
   train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps})
   train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv')
   train_info_df.to_csv(train_info_fp, index = False)
@@ -100,6 +100,7 @@ def main():
   test_info_df = pd.DataFrame({'fn' : test_fns, 'audio_fp' : test_audio_fps, 'selection_table_fp' : test_annot_fps})
   test_info_fp = os.path.join(formatted_data_dir, 'test_info.csv')
   test_info_df.to_csv(test_info_fp, index = False)
+  breakpoint()
 
 if __name__ == "__main__":
-  main()
\ No newline at end of file
+  main()
diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index cb09eac..d0d8e81 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -414,8 +414,8 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   fns = []
   fwd_predictions_fps = []
   bck_predictions_fps = []
-  comb_predictions_fps = []
-  match_predictions_fps = []
+  #comb_predictions_fps = []
+  #match_predictions_fps = []
   annotations_fps = []
 
   for fn in dataloader_dict:
@@ -423,21 +423,53 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
 
     fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose)
     bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose)
-    comb_predictions_fp, match_predictions_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, discard_threshold=args.comb_threshold)
-
     annotations_fp = dataloader_dict[fn].dataset.annot_fp
 
     fns.append(fn)
     fwd_predictions_fps.append(fwd_predictions_fp)
     bck_predictions_fps.append(bck_predictions_fp)
-    comb_predictions_fps.append(comb_predictions_fp)
-    match_predictions_fps.append(match_predictions_fp)
     annotations_fps.append(annotations_fp)
 
-  manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps})
+  #manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps})
+  manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'annotations_fp' : annotations_fps})
   return manifest
 
-def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
+def evaluate_based_on_manifest(manifest, args, output_dir, iou, class_threshold, comb_discard_threshold):
+  pred_types = ('fwd', 'bck', 'comb', 'match')
+  metrics = {p:{} for p in pred_types}
+  conf_mats = {p:{} for p in pred_types}
+  conf_mat_labels = {}
+
+  for i, row in manifest.iterrows():
+    fn = row['filename']
+    annots_fp = row['annotations_fp']
+    row['comb_predictions_fp'], row['match_predictions_fp'] = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=comb_discard_threshold)
+
+    for pred_type in pred_types:
+        preds_fp = row[f'{pred_type}_predictions_fp']
+        metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold)
+        conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold)
+
+  if output_dir is not None:
+    if not os.path.exists(output_dir):
+      os.makedirs(output_dir)
+
+  # summarize and save metrics
+  conf_mat_summaries = {}
+  for pred_type in ('fwd', 'bck', 'comb', 'match'):
+      summary = summarize_metrics(metrics[pred_type])
+      metrics[pred_type]['summary'] = summary
+      metrics[pred_type]['macro'] = macro_metrics(summary)
+      conf_mat_summaries[pred_type], confusion_matrix_labels = summarize_confusion_matrix(conf_mats[pred_type], conf_mat_labels[pred_type])
+  if output_dir is not None:
+    metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml')
+    with open(metrics_fp, 'w') as f:
+      yaml.dump(metrics, f)
+
+  # summarize and save confusion matrix
+  return metrics, conf_mat_summaries
+
+def combine_fwd_bck_preds(target_dir, fn, comb_iou_threshold, comb_discard_threshold):
     fwd_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-fwd.txt')
     bck_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-bck.txt')
     comb_preds_fp = os.path.join(target_dir, f'peaks_pred_{fn}-comb.txt')
@@ -448,8 +480,7 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
     c = Clip()
     c.load_annotations(fwd_preds_fp)
     c.load_predictions(bck_preds_fp)
-    c.compute_matching(IoU_minimum=0.5)
-    #comb_preds = fwd_preds.copy()
+    c.compute_matching(IoU_minimum=comb_iou_threshold)
     match_preds_list = []
     for fp, bp in c.matching:
         match_pred = fwd_preds.loc[fp].copy()
@@ -465,9 +496,11 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
     bck_matched_idxs = [m[1] for m in c.matching]
     fwd_unmatched = select_from_neg_idxs(fwd_preds, fwd_matched_idxs)
     bck_unmatched = select_from_neg_idxs(bck_preds, bck_matched_idxs)
-    comb_preds = pd.concat([match_preds, fwd_unmatched, bck_unmatched])
+    to_concat = [x for x in [match_preds, fwd_unmatched, bck_unmatched] if x.shape[0]>0]
+    comb_preds = pd.concat(to_concat) if len(to_concat)>0 else fwd_preds
     assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching)
-    comb_preds = comb_preds.loc[comb_preds['Detection Prob']>discard_threshold]
+    comb_preds = comb_preds.loc[comb_preds['Detection Prob']>comb_discard_threshold]
+    #print(f'Using combdiscardthresh {args.comb_discard_threshold} and comb_preds has shape {comb_preds.shape}')
     comb_preds.sort_values('Begin Time (s)')
     comb_preds.index = list(range(len(comb_preds)))
 
@@ -478,62 +511,3 @@ def combine_fwd_bck_preds(target_dir, fn, discard_threshold):
 def select_from_neg_idxs(df, neg_idxs):
     bool_mask = [i not in neg_idxs for i in range(len(df))]
     return df.loc[bool_mask]
-
-def evaluate_based_on_manifest(manifest, args, output_dir = None, iou = 0.5, class_threshold = 0.0):
-  pred_types = ('fwd', 'bck', 'comb', 'match')
-  metrics = {p:{} for p in pred_types}
-  conf_mats = {p:{} for p in pred_types}
-  #conf_mat_labels = {p:{} for p in pred_types}
-  conf_mat_labels = {}
-  #rev_metrics = {}
-  #rev_confusion_matrix = {}
-  #comb_metrics = {}
-  #comb_confusion_matrix = {}
-
-  for i, row in manifest.iterrows():
-    fn = row['filename']
-    annots_fp = row['annotations_fp']
-    for pred_type in pred_types:
-        preds_fp = row[f'{pred_type}_predictions_fp']
-        metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold)
-        conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold)
-    #rev_metrics[fn] = get_metrics(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
-    #rev_confusion_matrix[fn], rev_confusion_matrix_labels = get_confusion_matrix(rev_predictions_fp, annotations_fp, args, iou, class_threshold)
-    #comb_metrics[fn] = get_metrics(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
-    #comb_confusion_matrix[fn], comb_confusion_matrix_labels = get_confusion_matrix(comb_predictions_fp, annotations_fp, args, iou, class_threshold)
-
-  if output_dir is not None:
-    if not os.path.exists(output_dir):
-      os.makedirs(output_dir)
-
-  # summarize and save metrics
-  conf_mat_summaries = {}
-  for pred_type in ('fwd', 'bck', 'comb', 'match'):
-      summary = summarize_metrics(metrics[pred_type])
-      metrics[pred_type]['summary'] = summary
-      metrics[pred_type]['macro'] = macro_metrics(summary)
-      conf_mat_summaries[pred_type], confusion_matrix_labels = summarize_confusion_matrix(conf_mats[pred_type], conf_mat_labels[pred_type])
-  #macro = macro_metrics(summary)
-  #metrics['macro'] = macro
-  #rev_summary = summarize_metrics(rev_metrics)
-  #rev_metrics['summary'] = rev_summary
-  #rev_macro = macro_metrics(rev_summary)
-  #rev_metrics['macro'] = rev_macro
-  #comb_summary = summarize_metrics(comb_metrics)
-  #comb_metrics['summary'] = comb_summary
-  #comb_macro = macro_metrics(comb_summary)
-  #comb_metrics['macro'] = comb_macro
-  if output_dir is not None:
-    metrics_fp = os.path.join(output_dir, f'metrics_iou_{iou}_class_threshold_{class_threshold}.yaml')
-    with open(metrics_fp, 'w') as f:
-      yaml.dump(metrics, f)
-
-  # summarize and save confusion matrix
-  #confusion_matrix_summary, confusion_matrix_labels = summarize_confusion_matrix(confusion_matrix, confusion_matrix_labels)
-  #rev_confusion_matrix_summary, rev_confusion_matrix_labels = summarize_confusion_matrix(rev_confusion_matrix, rev_confusion_matrix_labels)
-  #comb_confusion_matrix_summary, comb_confusion_matrix_labels = summarize_confusion_matrix(comb_confusion_matrix, comb_confusion_matrix_labels)
-  #if output_dir is not None:
-    #plot_confusion_matrix(confusion_matrix_summary.astype(int), confusion_matrix_labels, output_dir, name=f"cm_iou_{iou}_class_threshold_{class_threshold}")
-
-  #return metrics, confusion_matrix_summary, rev_metrics, rev_confusion_matrix_summary, comb_metrics, comb_confusion_matrix_summary
-  return metrics, conf_mat_summaries
diff --git a/voxaboxen/project/project_setup.py b/voxaboxen/project/project_setup.py
index 09f9a8d..cd38721 100644
--- a/voxaboxen/project/project_setup.py
+++ b/voxaboxen/project/project_setup.py
@@ -7,37 +7,38 @@
 
 def project_setup(args):
   args = parse_project_args(args)
-  
+
   if not os.path.exists(args.project_dir):
     os.makedirs(args.project_dir)
-  
+
   all_annots = []
   for info_fp in [args.train_info_fp, args.val_info_fp, args.test_info_fp]:
     if info_fp is None:
       continue
-    
+
     info = pd.read_csv(info_fp)
     annot_fps = list(info['selection_table_fp'])
-    
+
     for annot_fp in annot_fps:
       if annot_fp != "None":
         selection_table = pd.read_csv(annot_fp, delimiter = '\t')
         annots = list(selection_table['Annotation'])
         all_annots.extend(annots)
-        
+
   label_set = sorted(set(all_annots))
   label_mapping = {x : x for x in label_set}
+  breakpoint()
   label_mapping['Unknown'] = 'Unknown'
   unknown_label = 'Unknown'
-  
+
   if unknown_label in label_set:
     label_set.remove(unknown_label)
-  
+
   setattr(args, "label_set", label_set)
   setattr(args, "label_mapping", label_mapping)
   setattr(args, "unknown_label", unknown_label)
-  
+
   save_params(args)
 
 if __name__ == "__main__":
-  project_setup(sys.argv[1:])
\ No newline at end of file
+  project_setup(sys.argv[1:])
diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py
index a4775df..1e580db 100644
--- a/voxaboxen/training/params.py
+++ b/voxaboxen/training/params.py
@@ -13,6 +13,7 @@ def parse_args(args,allow_unknown=False):
   parser.add_argument('--name', type = str, required=True)
   parser.add_argument('--seed', type=int, default=0)
   parser.add_argument('--is_test', '-t', action='store_true')
+  parser.add_argument('--overwrite', action='store_true')
 
   # Data
   parser.add_argument('--project-config-fp', type = str, required=True)
@@ -32,7 +33,9 @@ def parse_args(args,allow_unknown=False):
   parser.add_argument('--previous-checkpoint-fp', type=str, default=None, help="path to checkpoint of previously trained detection model")
   parser.add_argument('--aves-url', type=str, default = "https://storage.googleapis.com/esp-public-files/ported_aves/aves-base-bio.torchaudio.pt")
   parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo")
-  parser.add_argument('--comb-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold")
+  parser.add_argument('--comb-discard-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold")
+  parser.add_argument('--comb-iou-threshold', type=float, default=0.5, help="discard combined detections whose prob is below this threshold")
+  parser.add_argument('--reload-from', type=str)
 
   # Training
   parser.add_argument('--batch-size', type=int, default=32)
diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py
index 6fe1a19..f4cfc28 100644
--- a/voxaboxen/training/train.py
+++ b/voxaboxen/training/train.py
@@ -63,10 +63,8 @@ def train(model, args):
         yaml.dump(train_evals_by_epoch, f)
 
       if use_val:
-        #eval_scores, rev_eval_scores, comb_eval_scores = val_epoch(model, t, val_dataloader, args)
         eval_scores = val_epoch(model, t, val_dataloader, args)
-        #for pt,pt_es in eval_scores.items():
-        # TODO: maybe plot rev-evals
+        # TODO: maybe plot evals for other pred_types
         val_evals.append(eval_scores['comb'].copy())
         plot_eval(train_evals, learning_rates, args, val_evals=val_evals)
 
@@ -125,7 +123,7 @@ def train(model, args):
 
   return model
 
-def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn):
+def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn, class_loss_fn):
     end_mask_perc = args.end_mask_perc
     end_mask_dur = int(det_preds.size(1)*end_mask_perc)
 
@@ -135,13 +133,15 @@ def lf(dets, det_preds, regs, reg_preds, y, y_preds, args, reg_loss_fn):
     reg_preds_clipped = reg_preds[:,end_mask_dur:-end_mask_dur]
     regs_clipped = regs[:,end_mask_dur:-end_mask_dur]
 
-    #y_preds_clipped = y_preds[:,end_mask_dur:-end_mask_dur,:]
     y_clipped = y[:,end_mask_dur:-end_mask_dur,:]
 
     detection_loss = modified_focal_loss(det_preds_clipped, dets_clipped, pos_loss_weight=args.pos_loss_weight)
     reg_loss = reg_loss_fn(reg_preds_clipped, regs_clipped, dets_clipped, y_clipped)
-    #class_loss = class_loss_fn(y_preds_clipped, y_clipped, dets_clipped)
-    class_loss = torch.tensor(0)
+    if len(args.label_set)==1:
+        class_loss = torch.tensor(0)
+    else:
+        y_preds_clipped = y_preds[:,end_mask_dur:-end_mask_dur,:]
+        class_loss = class_loss_fn(y_preds_clipped, y_clipped, dets_clipped)
     return detection_loss, reg_loss, class_loss
 
 def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss_fn, optimizer, args):
@@ -174,8 +174,8 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss
       # We mask out loss from each end of the clip, so the model isn't forced to learn to detect events that are partially cut off.
       # This does not affect inference, because during inference we overlap clips at 50%
 
-      detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, class_logits, y, args=args, reg_loss_fn=reg_loss_fn)
-      rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_class_logits, rev_y, args=args, reg_loss_fn=reg_loss_fn)
+      detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, y, class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn)
+      rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_y, rev_class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn)
       normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
       rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss
       loss = (normal_loss + rev_loss)/2
@@ -209,9 +209,6 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss
       rev_regression_losses.append(args.lamb * rev_reg_loss.item())
       rev_class_losses.append(args.rho * rev_class_loss.item())
 
-      #if i > 150:
-          #breakpoint()
-      # Backpropagation
       optimizer.zero_grad()
       loss.backward()
 
@@ -231,31 +228,19 @@ def val_epoch(model, t, dataloader, args):
     model.eval()
 
     manifest = predict_and_generate_manifest(model, dataloader, args, verbose = False)
-    #e, _, rev_e, _, comb_e, _  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
-    e, _  = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'val_results'), iou = args.model_selection_iou, class_threshold = args.model_selection_class_threshold)
+    e, _ = evaluate_based_on_manifest(manifest, args, output_dir=os.path.join(args.experiment_dir, 'val_results'), iou=args.model_selection_iou, class_threshold=args.model_selection_class_threshold, comb_discard_threshold=args.comb_discard_threshold)
 
     print(f"Epoch {t} | val@{args.model_selection_iou}IoU:")
     evals = {}
     for pt in e.keys():
         evals[pt] = {k:[] for k in ['precision','recall','f1']}
-    #evals = {k:[] for k in ['precision','recall','f1']}
-    #rev_evals = {k:[] for k in ['precision','recall','f1']}
-    #comb_evals = {k:[] for k in ['precision','recall','f1']}
         for k in ['precision','recall','f1']:
           for l in args.label_set:
             m = e[pt]['summary'][l][k]
-            #rev_m = rev_e['summary'][l][k]
-            #comb_m = comb_e['summary'][l][k]
             evals[pt][k].append(m)
-            #rev_evals[k].append(rev_m)
-            #comb_evals[k].append(comb_m)
           evals[pt][k] = float(np.mean(evals[pt][k]))
-          #rev_evals[k] = float(np.mean(rev_evals[k]))
-          #comb_evals[k] = float(np.mean(comb_evals[k]))
 
         print(f"{pt}prec: {evals[pt]['precision']:1.3f} {pt}rec: {evals[pt]['recall']:1.3f} {pt}F1: {evals[pt]['f1']:1.3f}", end=' ')
-        #revprec: {rev_evals['precision']:1.3f} revrec: {rev_evals['recall']:1.3f} revF1: {rev_evals['f1']:1.3f} combprec: {comb_evals['precision']:1.3f} combrec: {comb_evals['recall']:1.3f} combF1: {comb_evals['f1']:1.3f}")
-    #return evals, rev_evals, comb_evals
     print()
     return evals
 
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index 62e87c7..be8ee8c 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -1,4 +1,6 @@
+import pandas as pd
 from voxaboxen.data.data import get_test_dataloader
+import torch
 from voxaboxen.model.model import DetectionModel, DetectionModelStereo
 from voxaboxen.training.train import train
 from voxaboxen.training.params import parse_args, set_seed, save_params
@@ -16,7 +18,8 @@ def train_model(args):
 
   experiment_dir = os.path.join(args.project_dir, args.name)
   setattr(args, 'experiment_dir', str(experiment_dir))
-  if not os.path.exists(args.experiment_dir):
+  if os.path.exists(args.experiment_dir) and not args.overwrite:
+    sys.exit('experiment already exists with this name')
     os.makedirs(args.experiment_dir)
 
   experiment_output_dir = os.path.join(experiment_dir, "outputs")
@@ -30,21 +33,32 @@ def train_model(args):
   else:
     model = DetectionModel(args)
 
+  if args.reload_from is not None:
+    #model.load_state_dict(os.path.join(args.experiment_dir), 'model.pt')
+    checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt'))
+    model.load_state_dict(checkpoint['model_state_dict'])
+
   ## Training
-  trained_model = train(model, args)
+  if args.n_epochs == 0:
+    trained_model = model
+  else:
+      trained_model = train(model, args)
 
   ## Evaluation
   test_dataloader = get_test_dataloader(args)
 
   manifest = predict_and_generate_manifest(trained_model, test_dataloader, args)
 
-  class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95]
+  #class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95]
+  class_threshes = [0.0, 0.5, 0.95]
   for iou in [0.2, 0.5, 0.8]:
     for class_threshold in class_threshes:
-      metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou = iou, class_threshold = class_threshold)
-      print(f'IOU: {iou} class_thresh: {class_threshold}')
-      for pred_type in metrics.keys():
-          print(f'{pred_type}:', {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()})
+        for comb_discard_thresh in [0.85]:
+          metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=class_threshold, comb_discard_threshold=comb_discard_thresh)
+          print(f'IOU: {iou} class_thresh: {class_threshold} Comb discard threshold: {comb_discard_thresh}')
+          for pred_type in metrics.keys():
+              to_print = {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()} if len(args.label_set)==1 else dict(pd.DataFrame(metrics[pred_type]['summary']).mean(axis=1).round(4))
+              print(f'{pred_type}:', to_print)
 
 if __name__ == "__main__":
   train_model(sys.argv[1:])

From b587878d494bb94f8d1161759d87a37daf878ab9 Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Wed, 24 Apr 2024 09:31:49 +0100
Subject: [PATCH 07/11] fit comb-discard threshold on the val set after
 training

---
 datasets/BV/process_BV.py         | 54 +++++++++++++++----------------
 datasets/hawaii/process_hawaii.py | 41 +++++++++++------------
 voxaboxen/training/train_model.py | 37 ++++++++++++++-------
 3 files changed, 73 insertions(+), 59 deletions(-)

diff --git a/datasets/BV/process_BV.py b/datasets/BV/process_BV.py
index edcf1bf..e0b4e36 100644
--- a/datasets/BV/process_BV.py
+++ b/datasets/BV/process_BV.py
@@ -16,14 +16,14 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion,
   audio_dur_sec = np.floor(audio_dur_samples / sr)
   train_audio_dur_samples = int(audio_dur_samples * train_proportion)
   train_audio_dur_sec = train_audio_dur_samples / sr
-  
+
   val_audio_dur_samples = int(audio_dur_samples * val_proportion)
   val_audio_dur_sec = val_audio_dur_samples / sr
-  
+
   train_audio = audio[:train_audio_dur_samples]
   val_audio = audio[train_audio_dur_samples:train_audio_dur_samples+val_audio_dur_samples]
   test_audio = audio[train_audio_dur_samples+val_audio_dur_samples:]
-  
+
   ys = []
   for i, row in annot.iterrows():
     y = 'voc'
@@ -33,30 +33,30 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion,
     #     if row[label] == 'POS':
     #       y = label
     ys.append(y)
-    
+
   begin_time = list(annot['Starttime'])
   end_time = list(annot['Endtime'])
-  
+
   selection_table = pd.DataFrame({'Begin Time (s)' : begin_time, 'End Time (s)' : end_time, 'Annotation' : ys, 'Low Freq (Hz)' : [low_hz for x in begin_time], 'High Freq (Hz)' : [high_hz for x in begin_time]}).drop_duplicates()
-  
+
   train_selection_table = selection_table[selection_table['End Time (s)'] < train_audio_dur_sec].copy()
   val_selection_table = selection_table[(selection_table['End Time (s)'] >= train_audio_dur_sec) & (selection_table['End Time (s)'] < train_audio_dur_sec + val_audio_dur_sec)].copy()
   val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'] - train_audio_dur_sec
   val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'].map(lambda x : max(x, 0))
   val_selection_table['End Time (s)'] = val_selection_table['End Time (s)'] - train_audio_dur_sec
-  
+
   test_selection_table = selection_table[selection_table['Begin Time (s)'] >= train_audio_dur_sec + val_audio_dur_sec].copy()
   test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec)
   test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'].map(lambda x : max(x, 0))
   test_selection_table['End Time (s)'] = test_selection_table['End Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec)
-  
+
   return train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio
 
-def main():  
+def main():
   cwd = os.getcwd()
-  
+
   raw_data_dir = os.path.join(cwd, 'raw')
-  
+
   formatted_data_dir = os.path.join(cwd, 'formatted')
   formatted_audio_dir = os.path.join(formatted_data_dir, 'audio')
   formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables')
@@ -64,65 +64,65 @@ def main():
     if not os.path.exists(d):
       os.makedirs(d)
 
-  train_proportion = 0.6 
+  train_proportion = 0.6
   val_proportion = 0.2
-  
+
   annotation_fns = sorted(glob(os.path.join(raw_data_dir, '*.csv')))
   annotation_fns = [os.path.basename(x) for x in annotation_fns]
   audio_fns = sorted(glob(os.path.join(raw_data_dir, '*.wav')))
   audio_fns = [os.path.basename(x) for x in audio_fns]
-  
+
   train_fns = []
   train_audio_fps = []
   train_annot_fps = []
-  
+
   val_fns = []
   val_audio_fps = []
   val_annot_fps = []
-  
+
   test_fns = []
   test_audio_fps = []
   test_annot_fps = []
-  
+
   for annot_fn, audio_fn in tqdm.tqdm(zip(annotation_fns, audio_fns)):
     fn = annot_fn.split('.')[0]
     train_fns.append(f"{fn}_train")
     val_fns.append(f"{fn}_val")
     test_fns.append(f"{fn}_test")
-    
+
     annot_fp = os.path.join(raw_data_dir, annot_fn)
     audio_fp = os.path.join(raw_data_dir, audio_fn)
-    
+
     annot = pd.read_csv(annot_fp)
     audio, sr = sf.read(audio_fp)
-    
+
     train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio = process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion)
-  
+
     train_selection_table_fn = f"{annot_fn.split('.')[0]}_train.txt"
     train_selection_table_fp = os.path.join(formatted_annot_dir, train_selection_table_fn)
     train_selection_table.to_csv(train_selection_table_fp, sep = '\t', index = False)
     train_annot_fps.append(train_selection_table_fp)
-    
+
     train_audio_fn = f"{audio_fn.split('.')[0]}_train.wav"
     train_audio_fp = os.path.join(formatted_audio_dir, train_audio_fn)
     sf.write(train_audio_fp, train_audio, sr)
     train_audio_fps.append(train_audio_fp)
-    
+
     val_selection_table_fn = f"{annot_fn.split('.')[0]}_val.txt"
     val_selection_table_fp = os.path.join(formatted_annot_dir, val_selection_table_fn)
     val_selection_table.to_csv(val_selection_table_fp, sep = '\t', index = False)
     val_annot_fps.append(val_selection_table_fp)
-    
+
     val_audio_fn = f"{audio_fn.split('.')[0]}_val.wav"
     val_audio_fp = os.path.join(formatted_audio_dir, val_audio_fn)
     sf.write(val_audio_fp, val_audio, sr)
     val_audio_fps.append(val_audio_fp)
-    
+
     test_selection_table_fn = f"{annot_fn.split('.')[0]}_test.txt"
     test_selection_table_fp = os.path.join(formatted_annot_dir, test_selection_table_fn)
     test_selection_table.to_csv(test_selection_table_fp, sep = '\t', index = False)
     test_annot_fps.append(test_selection_table_fp)
-    
+
     test_audio_fn = f"{audio_fn.split('.')[0]}_test.wav"
     test_audio_fp = os.path.join(formatted_audio_dir, test_audio_fn)
     sf.write(test_audio_fp, test_audio, sr)
@@ -133,7 +133,7 @@ def main():
     test_audio = None
     val_audio = None
     train_audio = None
-  
+
   train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps})
   train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv')
   train_info_df.to_csv(train_info_fp, index = False)
diff --git a/datasets/hawaii/process_hawaii.py b/datasets/hawaii/process_hawaii.py
index 14060b8..d20b408 100644
--- a/datasets/hawaii/process_hawaii.py
+++ b/datasets/hawaii/process_hawaii.py
@@ -14,78 +14,79 @@
 
 def main():
   cwd = os.getcwd()
-  
+
   raw_data_dir = os.path.join(cwd, 'raw')
   audio_dir = os.path.join(cwd, 'soundscape_data')
-  
+
   raw_annotations_fp = os.path.join(cwd, 'raw', 'annotations.csv')
   raw_annot_df = pd.read_csv(raw_annotations_fp)
   raw_annot_df['Annotation'] = raw_annot_df['Species eBird Code']
   raw_annot_df = raw_annot_df.drop('Species eBird Code', axis=1)
-  
+
   raw_annot_df['Begin Time (s)'] = raw_annot_df['Start Time (s)']
   raw_annot_df = raw_annot_df.drop('Start Time (s)', axis=1)
-  
+
   formatted_data_dir = os.path.join(cwd, 'formatted')
   formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables')
   for d in [formatted_data_dir, formatted_annot_dir]:
     if not os.path.exists(d):
       os.makedirs(d)
-  
+
   train_proportion = 0.6
   val_proportion = 0.2
-  
+
   train_audio_fps = []
   val_audio_fps = []
   test_audio_fps = []
-  
+
   for i in range(1,5):
     audio_fps = sorted(glob(os.path.join(audio_dir, f"*_S0{i}_*.{file_extension}")))
+    #audio_fps = sorted(glob(os.path.join(audio_dir, f"*Recording_{i}_*.{file_extension}")))
     n_train = int(train_proportion * len(audio_fps))
     n_val = int(val_proportion * len(audio_fps))
-    
+
     train_audio_fps.extend(audio_fps[:n_train])
     val_audio_fps.extend(audio_fps[n_train:n_train+n_val])
     test_audio_fps.extend(audio_fps[n_train+n_val:])
-  
+
   train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps]
   val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps]
   test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps]
-  
+
   train_annot_fps = []
   val_annot_fps = []
   test_annot_fps = []
-  
+
   for fn, audio_fp in zip(train_fns, train_audio_fps):
     sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac']
     sub_annot_df = sub_annot_df.drop('Filename', axis = 1)
-    
+
     annot_fn = f"selection_table_{fn.split('.')[0]}.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-    
+
     sub_annot_df.to_csv(annot_fp, sep = '\t', index = False)
     train_annot_fps.append(annot_fp)
-    
+
   for fn, audio_fp in zip(val_fns, val_audio_fps):
     sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac']
     sub_annot_df = sub_annot_df.drop('Filename', axis = 1)
-    
+
     annot_fn = f"selection_table_{fn.split('.')[0]}.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-    
+
     sub_annot_df.to_csv(annot_fp, sep = '\t', index = False)
     val_annot_fps.append(annot_fp)
-    
+
   for fn, audio_fp in zip(test_fns, test_audio_fps):
     sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac']
     sub_annot_df = sub_annot_df.drop('Filename', axis = 1)
-    
+
     annot_fn = f"selection_table_{fn.split('.')[0]}.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-    
+
     sub_annot_df.to_csv(annot_fp, sep = '\t', index = False)
     test_annot_fps.append(annot_fp)
-  
+
   train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps})
   train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv')
   train_info_df.to_csv(train_info_fp, index = False)
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index be8ee8c..8a63436 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -1,5 +1,5 @@
 import pandas as pd
-from voxaboxen.data.data import get_test_dataloader
+from voxaboxen.data.data import get_test_dataloader, get_val_dataloader
 import torch
 from voxaboxen.model.model import DetectionModel, DetectionModelStereo
 from voxaboxen.training.train import train
@@ -10,7 +10,13 @@
 import sys
 import os
 
+
+def print_metrics(metrics, just_one_label):
+    for pred_type in metrics.keys():
+        to_print = {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()} if just_one_label else dict(pd.DataFrame(metrics[pred_type]['summary']).mean(axis=1).round(4))
+        print(f'{pred_type}:', to_print)
 def train_model(args):
+
   ## Setup
   args = parse_args(args)
 
@@ -34,7 +40,6 @@ def train_model(args):
     model = DetectionModel(args)
 
   if args.reload_from is not None:
-    #model.load_state_dict(os.path.join(args.experiment_dir), 'model.pt')
     checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt'))
     model.load_state_dict(checkpoint['model_state_dict'])
 
@@ -46,19 +51,27 @@ def train_model(args):
 
   ## Evaluation
   test_dataloader = get_test_dataloader(args)
+  val_dataloader = get_val_dataloader(args)
+
+  val_manifest = predict_and_generate_manifest(trained_model, val_dataloader, args)
 
-  manifest = predict_and_generate_manifest(trained_model, test_dataloader, args)
+  best_comb_discard_thresh = -1
+  best_f1 = 0
+  for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]:
+    val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh)
+    new_f1 = val_metrics['comb']['macro']['f1']
+    if new_f1 > best_f1:
+      best_comb_discard_thresh = comb_discard_thresh
+      best_f1 = new_f1
+    print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}')
+    print_metrics(val_metrics, just_one_label=(len(args.label_set)==1))
 
-  #class_threshes = [0] if len(args.label_set)==1 else [0.0, 0.5, 0.95]
-  class_threshes = [0.0, 0.5, 0.95]
+  test_manifest = predict_and_generate_manifest(trained_model, test_dataloader, args)
+  print(f'Using thresh: {best_comb_discard_thresh}')
   for iou in [0.2, 0.5, 0.8]:
-    for class_threshold in class_threshes:
-        for comb_discard_thresh in [0.85]:
-          metrics, conf_mats = evaluate_based_on_manifest(manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=class_threshold, comb_discard_threshold=comb_discard_thresh)
-          print(f'IOU: {iou} class_thresh: {class_threshold} Comb discard threshold: {comb_discard_thresh}')
-          for pred_type in metrics.keys():
-              to_print = {k1:{k:round(100*v,4) for k,v in v1.items()} for k1,v1 in metrics[pred_type]['summary'].items()} if len(args.label_set)==1 else dict(pd.DataFrame(metrics[pred_type]['summary']).mean(axis=1).round(4))
-              print(f'{pred_type}:', to_print)
+    test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=best_comb_discard_thresh)
+    print(f'Test with IOU{iou}')
+    print_metrics(test_metrics, just_one_label=(len(args.label_set)==1))
 
 if __name__ == "__main__":
   train_model(sys.argv[1:])

From 84bcf19d0e94e73486a4ebb4df58bf1ab8d2250a Mon Sep 17 00:00:00 2001
From: Louis Mahon <lmahonology@gmail.com>
Date: Wed, 24 Apr 2024 09:58:45 +0100
Subject: [PATCH 08/11] clean up some old code

---
 voxaboxen/data/data.py             |  3 +--
 voxaboxen/evaluation/evaluation.py | 11 +++++------
 voxaboxen/project/project_setup.py |  1 -
 voxaboxen/training/params.py       |  8 ++++----
 voxaboxen/training/train.py        | 16 ----------------
 5 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/voxaboxen/data/data.py b/voxaboxen/data/data.py
index 924ce86..0f360e8 100644
--- a/voxaboxen/data/data.py
+++ b/voxaboxen/data/data.py
@@ -238,8 +238,7 @@ def get_train_dataloader(args, random_seed_shift = 0):
   train_dataloader = DataLoader(train_dataset,
                                 batch_size=effective_batch_size,
                                 shuffle=True,
-                                #num_workers=args.num_workers,
-                                num_workers=0,
+                                num_workers=args.num_workers,
                                 pin_memory=True,
                                 drop_last = True)
 
diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index d0d8e81..97e8082 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -222,7 +222,6 @@ def generate_features(model, single_clip_dataloader, args, verbose = True):
 
   return all_features.detach().cpu().numpy()
 
-#def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, det_threshold=0.5, classif_threshold=0):
 def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, classif_threshold=0):
 
   if target_dir is None:
@@ -414,8 +413,6 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   fns = []
   fwd_predictions_fps = []
   bck_predictions_fps = []
-  #comb_predictions_fps = []
-  #match_predictions_fps = []
   annotations_fps = []
 
   for fn in dataloader_dict:
@@ -430,7 +427,6 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
     bck_predictions_fps.append(bck_predictions_fp)
     annotations_fps.append(annotations_fp)
 
-  #manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'comb_predictions_fp' : comb_predictions_fps, 'match_predictions_fp' : match_predictions_fps, 'annotations_fp' : annotations_fps})
   manifest = pd.DataFrame({'filename' : fns, 'fwd_predictions_fp' : fwd_predictions_fps, 'bck_predictions_fp' : bck_predictions_fps, 'annotations_fp' : annotations_fps})
   return manifest
 
@@ -487,11 +483,13 @@ def combine_fwd_bck_preds(target_dir, fn, comb_iou_threshold, comb_discard_thres
         bck_pred = bck_preds.iloc[bp]
         bp_end_time = bck_pred['End Time (s)']
         match_pred['End Time (s)'] = bp_end_time
+        # Sorta like assuming forward and back predictions are independent, gives a high prob for the matched predictions
         match_pred['Detection Prob'] = 1 - (1-match_pred['Detection Prob'])*(1-bck_pred['Detection Prob'])
         match_preds_list.append(match_pred)
 
     match_preds = pd.DataFrame(match_preds_list, columns=fwd_preds.columns)
-    # Now include the union of all that weren't matched
+
+    # Include the union of all predictions that weren't part of the matching
     fwd_matched_idxs = [m[0] for m in c.matching]
     bck_matched_idxs = [m[1] for m in c.matching]
     fwd_unmatched = select_from_neg_idxs(fwd_preds, fwd_matched_idxs)
@@ -499,8 +497,9 @@ def combine_fwd_bck_preds(target_dir, fn, comb_iou_threshold, comb_discard_thres
     to_concat = [x for x in [match_preds, fwd_unmatched, bck_unmatched] if x.shape[0]>0]
     comb_preds = pd.concat(to_concat) if len(to_concat)>0 else fwd_preds
     assert len(comb_preds) == len(fwd_preds) + len(bck_preds) - len(c.matching)
+
+    # Finally, keep only predictions above a threshold, this will include almost all matches
     comb_preds = comb_preds.loc[comb_preds['Detection Prob']>comb_discard_threshold]
-    #print(f'Using combdiscardthresh {args.comb_discard_threshold} and comb_preds has shape {comb_preds.shape}')
     comb_preds.sort_values('Begin Time (s)')
     comb_preds.index = list(range(len(comb_preds)))
 
diff --git a/voxaboxen/project/project_setup.py b/voxaboxen/project/project_setup.py
index cd38721..48b39d0 100644
--- a/voxaboxen/project/project_setup.py
+++ b/voxaboxen/project/project_setup.py
@@ -27,7 +27,6 @@ def project_setup(args):
 
   label_set = sorted(set(all_annots))
   label_mapping = {x : x for x in label_set}
-  breakpoint()
   label_mapping['Unknown'] = 'Unknown'
   unknown_label = 'Unknown'
 
diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py
index 1e580db..b4846b0 100644
--- a/voxaboxen/training/params.py
+++ b/voxaboxen/training/params.py
@@ -12,8 +12,8 @@ def parse_args(args,allow_unknown=False):
   # General
   parser.add_argument('--name', type = str, required=True)
   parser.add_argument('--seed', type=int, default=0)
-  parser.add_argument('--is_test', '-t', action='store_true')
-  parser.add_argument('--overwrite', action='store_true')
+  parser.add_argument('--is_test', '-t', action='store_true', help='run a quick version for testing')
+  parser.add_argument('--overwrite', action='store_true', help='overwrite an experiment of the same name, if it exists')
 
   # Data
   parser.add_argument('--project-config-fp', type = str, required=True)
@@ -33,8 +33,8 @@ def parse_args(args,allow_unknown=False):
   parser.add_argument('--previous-checkpoint-fp', type=str, default=None, help="path to checkpoint of previously trained detection model")
   parser.add_argument('--aves-url', type=str, default = "https://storage.googleapis.com/esp-public-files/ported_aves/aves-base-bio.torchaudio.pt")
   parser.add_argument('--stereo', action='store_true', help="If passed, will process stereo data as stereo")
-  parser.add_argument('--comb-discard-threshold', type=float, default=0.75, help="discard combined detections whose prob is below this threshold")
-  parser.add_argument('--comb-iou-threshold', type=float, default=0.5, help="discard combined detections whose prob is below this threshold")
+  parser.add_argument('--comb-discard-threshold', type=float, default=0.75, help="during evaluation, discard combined detections whose prob is below this threshold")
+  parser.add_argument('--comb-iou-threshold', type=float, default=0.5, help="minimum iou to match a forward and backward prediction")
   parser.add_argument('--reload-from', type=str)
 
   # Training
diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py
index f4cfc28..d7e9db8 100644
--- a/voxaboxen/training/train.py
+++ b/voxaboxen/training/train.py
@@ -179,23 +179,7 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss
       normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
       rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss
       loss = (normal_loss + rev_loss)/2
-      #end_mask_perc = args.end_mask_perc
-      #end_mask_dur = int(probs.size(1)*end_mask_perc)
 
-      #d_clipped = d[:,end_mask_dur:-end_mask_dur]
-      #probs_clipped = probs[:,end_mask_dur:-end_mask_dur]
-
-      #regression_clipped = regression[:,end_mask_dur:-end_mask_dur]
-      #r_clipped = r[:,end_mask_dur:-end_mask_dur]
-
-      #class_logits_clipped = class_logits[:,end_mask_dur:-end_mask_dur,:]
-      #y_clipped = y[:,end_mask_dur:-end_mask_dur,:]
-
-      #detection_loss = detection_loss_fn(probs_clipped, d_clipped, pos_loss_weight = args.pos_loss_weight)
-      #reg_loss = reg_loss_fn(regression_clipped, r_clipped, d_clipped, y_clipped)
-      #class_loss = class_loss_fn(class_logits_clipped, y_clipped, d_clipped)
-
-      #loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
       train_loss += loss.item()
       rev_train_loss += rev_loss.item()
       normal_train_loss += normal_loss.item()

From 0a12448538e6a198d045842136c7a580d0a65d0a Mon Sep 17 00:00:00 2001
From: Lou1sM <lmahonology@gmail.com>
Date: Wed, 1 May 2024 13:42:31 +0100
Subject: [PATCH 09/11] remove unused bash scripts and leftover breakpoints

---
 MT-grid-search.sh                         | 12 -----
 datasets/BV/process_BV.py                 | 54 +++++++++++------------
 datasets/hawaii/process_hawaii.py         | 41 +++++++++--------
 datasets/powdermill/process_powdermill.py | 49 ++++++++++----------
 read_grid_search.py                       | 26 -----------
 run.sh                                    |  1 -
 voxaboxen/training/train_model.py         |  2 +-
 7 files changed, 72 insertions(+), 113 deletions(-)
 delete mode 100644 MT-grid-search.sh
 delete mode 100644 read_grid_search.py
 delete mode 100644 run.sh

diff --git a/MT-grid-search.sh b/MT-grid-search.sh
deleted file mode 100644
index b8a95cd..0000000
--- a/MT-grid-search.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-for detthresh in 0.55; do
-#for detthresh in 0.4; do
-    for combiouthresh in 0.5 0.55 0.6; do
-    #for combiouthresh in 0.4; do
-        for combdiscardthresh in  0.8 0.85 0.9; do
-            combdiscardthresh2=$(echo ${combdiscardthresh}-0.075 | bc -l)
-            (trap 'kill 0' SIGINT; python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh} & python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=bidirectional-${detthresh}-${combiouthresh}-${combdiscardthresh2} --lr=.00005 --batch-size=4 --n-epochs 20 --detection-threshold ${detthresh} --comb-iou-threshold ${combiouthresh} --comb-discard-threshold ${combdiscardthresh2} & wait) 
-        done
-    done
-done
diff --git a/datasets/BV/process_BV.py b/datasets/BV/process_BV.py
index e0b4e36..edcf1bf 100644
--- a/datasets/BV/process_BV.py
+++ b/datasets/BV/process_BV.py
@@ -16,14 +16,14 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion,
   audio_dur_sec = np.floor(audio_dur_samples / sr)
   train_audio_dur_samples = int(audio_dur_samples * train_proportion)
   train_audio_dur_sec = train_audio_dur_samples / sr
-
+  
   val_audio_dur_samples = int(audio_dur_samples * val_proportion)
   val_audio_dur_sec = val_audio_dur_samples / sr
-
+  
   train_audio = audio[:train_audio_dur_samples]
   val_audio = audio[train_audio_dur_samples:train_audio_dur_samples+val_audio_dur_samples]
   test_audio = audio[train_audio_dur_samples+val_audio_dur_samples:]
-
+  
   ys = []
   for i, row in annot.iterrows():
     y = 'voc'
@@ -33,30 +33,30 @@ def process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion,
     #     if row[label] == 'POS':
     #       y = label
     ys.append(y)
-
+    
   begin_time = list(annot['Starttime'])
   end_time = list(annot['Endtime'])
-
+  
   selection_table = pd.DataFrame({'Begin Time (s)' : begin_time, 'End Time (s)' : end_time, 'Annotation' : ys, 'Low Freq (Hz)' : [low_hz for x in begin_time], 'High Freq (Hz)' : [high_hz for x in begin_time]}).drop_duplicates()
-
+  
   train_selection_table = selection_table[selection_table['End Time (s)'] < train_audio_dur_sec].copy()
   val_selection_table = selection_table[(selection_table['End Time (s)'] >= train_audio_dur_sec) & (selection_table['End Time (s)'] < train_audio_dur_sec + val_audio_dur_sec)].copy()
   val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'] - train_audio_dur_sec
   val_selection_table['Begin Time (s)'] = val_selection_table['Begin Time (s)'].map(lambda x : max(x, 0))
   val_selection_table['End Time (s)'] = val_selection_table['End Time (s)'] - train_audio_dur_sec
-
+  
   test_selection_table = selection_table[selection_table['Begin Time (s)'] >= train_audio_dur_sec + val_audio_dur_sec].copy()
   test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec)
   test_selection_table['Begin Time (s)'] = test_selection_table['Begin Time (s)'].map(lambda x : max(x, 0))
   test_selection_table['End Time (s)'] = test_selection_table['End Time (s)'] - (train_audio_dur_sec + val_audio_dur_sec)
-
+  
   return train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio
 
-def main():
+def main():  
   cwd = os.getcwd()
-
+  
   raw_data_dir = os.path.join(cwd, 'raw')
-
+  
   formatted_data_dir = os.path.join(cwd, 'formatted')
   formatted_audio_dir = os.path.join(formatted_data_dir, 'audio')
   formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables')
@@ -64,65 +64,65 @@ def main():
     if not os.path.exists(d):
       os.makedirs(d)
 
-  train_proportion = 0.6
+  train_proportion = 0.6 
   val_proportion = 0.2
-
+  
   annotation_fns = sorted(glob(os.path.join(raw_data_dir, '*.csv')))
   annotation_fns = [os.path.basename(x) for x in annotation_fns]
   audio_fns = sorted(glob(os.path.join(raw_data_dir, '*.wav')))
   audio_fns = [os.path.basename(x) for x in audio_fns]
-
+  
   train_fns = []
   train_audio_fps = []
   train_annot_fps = []
-
+  
   val_fns = []
   val_audio_fps = []
   val_annot_fps = []
-
+  
   test_fns = []
   test_audio_fps = []
   test_annot_fps = []
-
+  
   for annot_fn, audio_fn in tqdm.tqdm(zip(annotation_fns, audio_fns)):
     fn = annot_fn.split('.')[0]
     train_fns.append(f"{fn}_train")
     val_fns.append(f"{fn}_val")
     test_fns.append(f"{fn}_test")
-
+    
     annot_fp = os.path.join(raw_data_dir, annot_fn)
     audio_fp = os.path.join(raw_data_dir, audio_fn)
-
+    
     annot = pd.read_csv(annot_fp)
     audio, sr = sf.read(audio_fp)
-
+    
     train_selection_table, train_audio, val_selection_table, val_audio, test_selection_table, test_audio = process_audio_and_annot(annot, audio, sr, train_proportion, val_proportion)
-
+  
     train_selection_table_fn = f"{annot_fn.split('.')[0]}_train.txt"
     train_selection_table_fp = os.path.join(formatted_annot_dir, train_selection_table_fn)
     train_selection_table.to_csv(train_selection_table_fp, sep = '\t', index = False)
     train_annot_fps.append(train_selection_table_fp)
-
+    
     train_audio_fn = f"{audio_fn.split('.')[0]}_train.wav"
     train_audio_fp = os.path.join(formatted_audio_dir, train_audio_fn)
     sf.write(train_audio_fp, train_audio, sr)
     train_audio_fps.append(train_audio_fp)
-
+    
     val_selection_table_fn = f"{annot_fn.split('.')[0]}_val.txt"
     val_selection_table_fp = os.path.join(formatted_annot_dir, val_selection_table_fn)
     val_selection_table.to_csv(val_selection_table_fp, sep = '\t', index = False)
     val_annot_fps.append(val_selection_table_fp)
-
+    
     val_audio_fn = f"{audio_fn.split('.')[0]}_val.wav"
     val_audio_fp = os.path.join(formatted_audio_dir, val_audio_fn)
     sf.write(val_audio_fp, val_audio, sr)
     val_audio_fps.append(val_audio_fp)
-
+    
     test_selection_table_fn = f"{annot_fn.split('.')[0]}_test.txt"
     test_selection_table_fp = os.path.join(formatted_annot_dir, test_selection_table_fn)
     test_selection_table.to_csv(test_selection_table_fp, sep = '\t', index = False)
     test_annot_fps.append(test_selection_table_fp)
-
+    
     test_audio_fn = f"{audio_fn.split('.')[0]}_test.wav"
     test_audio_fp = os.path.join(formatted_audio_dir, test_audio_fn)
     sf.write(test_audio_fp, test_audio, sr)
@@ -133,7 +133,7 @@ def main():
     test_audio = None
     val_audio = None
     train_audio = None
-
+  
   train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps})
   train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv')
   train_info_df.to_csv(train_info_fp, index = False)
diff --git a/datasets/hawaii/process_hawaii.py b/datasets/hawaii/process_hawaii.py
index d20b408..14060b8 100644
--- a/datasets/hawaii/process_hawaii.py
+++ b/datasets/hawaii/process_hawaii.py
@@ -14,79 +14,78 @@
 
 def main():
   cwd = os.getcwd()
-
+  
   raw_data_dir = os.path.join(cwd, 'raw')
   audio_dir = os.path.join(cwd, 'soundscape_data')
-
+  
   raw_annotations_fp = os.path.join(cwd, 'raw', 'annotations.csv')
   raw_annot_df = pd.read_csv(raw_annotations_fp)
   raw_annot_df['Annotation'] = raw_annot_df['Species eBird Code']
   raw_annot_df = raw_annot_df.drop('Species eBird Code', axis=1)
-
+  
   raw_annot_df['Begin Time (s)'] = raw_annot_df['Start Time (s)']
   raw_annot_df = raw_annot_df.drop('Start Time (s)', axis=1)
-
+  
   formatted_data_dir = os.path.join(cwd, 'formatted')
   formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables')
   for d in [formatted_data_dir, formatted_annot_dir]:
     if not os.path.exists(d):
       os.makedirs(d)
-
+  
   train_proportion = 0.6
   val_proportion = 0.2
-
+  
   train_audio_fps = []
   val_audio_fps = []
   test_audio_fps = []
-
+  
   for i in range(1,5):
     audio_fps = sorted(glob(os.path.join(audio_dir, f"*_S0{i}_*.{file_extension}")))
-    #audio_fps = sorted(glob(os.path.join(audio_dir, f"*Recording_{i}_*.{file_extension}")))
     n_train = int(train_proportion * len(audio_fps))
     n_val = int(val_proportion * len(audio_fps))
-
+    
     train_audio_fps.extend(audio_fps[:n_train])
     val_audio_fps.extend(audio_fps[n_train:n_train+n_val])
     test_audio_fps.extend(audio_fps[n_train+n_val:])
-
+  
   train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps]
   val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps]
   test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps]
-
+  
   train_annot_fps = []
   val_annot_fps = []
   test_annot_fps = []
-
+  
   for fn, audio_fp in zip(train_fns, train_audio_fps):
     sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac']
     sub_annot_df = sub_annot_df.drop('Filename', axis = 1)
-
+    
     annot_fn = f"selection_table_{fn.split('.')[0]}.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-
+    
     sub_annot_df.to_csv(annot_fp, sep = '\t', index = False)
     train_annot_fps.append(annot_fp)
-
+    
   for fn, audio_fp in zip(val_fns, val_audio_fps):
     sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac']
     sub_annot_df = sub_annot_df.drop('Filename', axis = 1)
-
+    
     annot_fn = f"selection_table_{fn.split('.')[0]}.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-
+    
     sub_annot_df.to_csv(annot_fp, sep = '\t', index = False)
     val_annot_fps.append(annot_fp)
-
+    
   for fn, audio_fp in zip(test_fns, test_audio_fps):
     sub_annot_df = raw_annot_df[raw_annot_df['Filename'] == f'{fn}.flac']
     sub_annot_df = sub_annot_df.drop('Filename', axis = 1)
-
+    
     annot_fn = f"selection_table_{fn.split('.')[0]}.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-
+    
     sub_annot_df.to_csv(annot_fp, sep = '\t', index = False)
     test_annot_fps.append(annot_fp)
-
+  
   train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps})
   train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv')
   train_info_df.to_csv(train_info_fp, index = False)
diff --git a/datasets/powdermill/process_powdermill.py b/datasets/powdermill/process_powdermill.py
index ed47e56..2a52b7d 100644
--- a/datasets/powdermill/process_powdermill.py
+++ b/datasets/powdermill/process_powdermill.py
@@ -11,86 +11,86 @@
 
 def main():
   cwd = os.getcwd()
-
+  
   raw_data_dir = os.path.join(cwd, 'raw')
   raw_annot_dir = os.path.join(raw_data_dir, 'selection_tables')
   audio_dir = os.path.join(cwd, 'soundscape_data')
-
+  
   formatted_data_dir = os.path.join(cwd, 'formatted')
   formatted_annot_dir = os.path.join(formatted_data_dir, 'selection_tables')
   for d in [formatted_data_dir, formatted_annot_dir]:
     if not os.path.exists(d):
       os.makedirs(d)
-
+      
   raw_annotations_fps = sorted(glob(os.path.join(cwd, 'raw', 'selection_tables', '*.txt')))
-
+  
   train_proportion = 0.6
   val_proportion = 0.2
-
+  
   train_audio_fps = []
   val_audio_fps = []
   test_audio_fps = []
-
+  
   for i in range(1,5):
     audio_fps = sorted(glob(os.path.join(audio_dir, f"Recording_{i}_*")))
     n_train = int(train_proportion * len(audio_fps))
     n_val = int(val_proportion * len(audio_fps))
-
+    
     train_audio_fps.extend(audio_fps[:n_train])
     val_audio_fps.extend(audio_fps[n_train:n_train+n_val])
     test_audio_fps.extend(audio_fps[n_train+n_val:])
-
-
+  
+  
   train_fns = [os.path.basename(x).split('.')[0] for x in train_audio_fps]
   val_fns = [os.path.basename(x).split('.')[0] for x in val_audio_fps]
   test_fns = [os.path.basename(x).split('.')[0] for x in test_audio_fps]
-
+  
   train_annot_fps = []
   val_annot_fps = []
   test_annot_fps = []
-
+  
   for fn, audio_fp in zip(train_fns, train_audio_fps):
     annot_fn = f"{fn}.Table.1.selections.txt"
     raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn)
-
+    
     annot_df = pd.read_csv(raw_annotations_fp, sep = '\t')
     annot_df['Annotation'] = annot_df['Species']
     annot_df = annot_df.drop('Species', axis=1)
-
+    
     annot_fn = f"{fn}.Table.1.selections.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-
+    
     annot_df.to_csv(annot_fp, sep = '\t', index = False)
     train_annot_fps.append(annot_fp)
-
+    
   for fn, audio_fp in zip(val_fns, val_audio_fps):
     annot_fn = f"{fn}.Table.1.selections.txt"
     raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn)
-
+    
     annot_df = pd.read_csv(raw_annotations_fp, sep = '\t')
     annot_df['Annotation'] = annot_df['Species']
     annot_df = annot_df.drop('Species', axis=1)
-
+    
     annot_fn = f"{fn}.Table.1.selections.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-
+    
     annot_df.to_csv(annot_fp, sep = '\t', index = False)
     val_annot_fps.append(annot_fp)
-
+    
   for fn, audio_fp in zip(test_fns, test_audio_fps):
     annot_fn = f"{fn}.Table.1.selections.txt"
     raw_annotations_fp = os.path.join(raw_annot_dir, annot_fn)
-
+    
     annot_df = pd.read_csv(raw_annotations_fp, sep = '\t')
     annot_df['Annotation'] = annot_df['Species']
     annot_df = annot_df.drop('Species', axis=1)
-
+    
     annot_fn = f"{fn}.Table.1.selections.txt"
     annot_fp = os.path.join(formatted_annot_dir, annot_fn)
-
+    
     annot_df.to_csv(annot_fp, sep = '\t', index = False)
     test_annot_fps.append(annot_fp)
-
+  
   train_info_df = pd.DataFrame({'fn' : train_fns, 'audio_fp' : train_audio_fps, 'selection_table_fp' : train_annot_fps})
   train_info_fp = os.path.join(formatted_data_dir, 'train_info.csv')
   train_info_df.to_csv(train_info_fp, index = False)
@@ -100,7 +100,6 @@ def main():
   test_info_df = pd.DataFrame({'fn' : test_fns, 'audio_fp' : test_audio_fps, 'selection_table_fp' : test_annot_fps})
   test_info_fp = os.path.join(formatted_data_dir, 'test_info.csv')
   test_info_df.to_csv(test_info_fp, index = False)
-  breakpoint()
 
 if __name__ == "__main__":
-  main()
+  main()
\ No newline at end of file
diff --git a/read_grid_search.py b/read_grid_search.py
deleted file mode 100644
index de3001e..0000000
--- a/read_grid_search.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import yaml
-from os.path import join
-import os
-import pandas as pd
-
-
-all_results = {}
-for detthresh in (0.4, 0.55, 0.7, 0.85):
-    all_results[detthresh] = {}
-    for combiouthresh in (0.4, 0.55, 0.7, 0.85):
-        all_results[detthresh][combiouthresh] = {}
-        for combdiscardthresh in  (0.4, 0.55, 0.7, 0.85):
-            all_results[detthresh][combiouthresh][combdiscardthresh] = {}
-            resdir = f'projects/MT_experiment/bidirectional-{detthresh}-{combiouthresh}-{combdiscardthresh}/test_results'
-            if not os.path.exists(resdir):
-                continue
-            results = {}
-            for iouf1 in (2,5,8):
-                with open(join(resdir, f'metrics_iou_0.{iouf1}_class_threshold_0.yaml')) as f:
-                    exp_results = yaml.safe_load(f)
-                for pred_type in ('fwd','bck','comb','match'):
-                   results[f'testiou{iouf1}-{pred_type}'] = exp_results[pred_type]['macro']['f1']
-            all_results[detthresh][combiouthresh][combdiscardthresh] = results
-
-breakpoint()
-
diff --git a/run.sh b/run.sh
deleted file mode 100644
index fc135c2..0000000
--- a/run.sh
+++ /dev/null
@@ -1 +0,0 @@
-python main.py train-model --project-config-fp=projects/MT_experiment/project_config.yaml --name=demo --lr=.00005 --batch-size=4 --n-epochs 4
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index 8a63436..8e14871 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -24,7 +24,7 @@ def train_model(args):
 
   experiment_dir = os.path.join(args.project_dir, args.name)
   setattr(args, 'experiment_dir', str(experiment_dir))
-  if os.path.exists(args.experiment_dir) and not args.overwrite:
+  if os.path.exists(args.experiment_dir) and args.overwrite and args.name!='demo':
     sys.exit('experiment already exists with this name')
     os.makedirs(args.experiment_dir)
 

From 73127332325e9d5f5b84f4f57743fef191f714d8 Mon Sep 17 00:00:00 2001
From: Lou1sM <lmahonology@gmail.com>
Date: Fri, 17 May 2024 17:18:29 +0100
Subject: [PATCH 10/11] pass cl-arg to run bidirectional, stereo and/or
 segmentation

---
 voxaboxen/evaluation/evaluation.py | 61 +++++++++++++++----------
 voxaboxen/model/model.py           | 23 ++++++++--
 voxaboxen/training/params.py       |  5 ++
 voxaboxen/training/train.py        | 73 +++++++++++++++++-------------
 voxaboxen/training/train_model.py  | 35 ++++++--------
 5 files changed, 116 insertions(+), 81 deletions(-)

diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index 97e8082..b6d5362 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -133,16 +133,18 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True):
       X = X.to(device = device, dtype = torch.float)
       X, _, _, _ = rms_and_mixup(X, None, None, None, False, args)
 
-      detection, regression, classif, rev_detection, rev_regression, rev_classif  = model(X)
-      classif = torch.nn.functional.softmax(classif, dim=-1)
-      rev_classif = torch.nn.functional.softmax(rev_classif, dim=-1)
-
-      all_detections.append(detection)
-      all_regressions.append(regression)
-      all_classifs.append(classif)
-      all_rev_detections.append(rev_detection)
-      all_rev_regressions.append(rev_regression)
-      all_rev_classifs.append(rev_classif)
+      model_outputs = model(X)
+      assert isinstance(model_outputs, tuple)
+      all_detections.append(model_outputs[0])
+      all_regressions.append(model_outputs[1])
+      all_classifs.append(model_outputs[2].softmax(-1))
+      if model.is_bidirectional:
+          assert all(x is not None for x in model_outputs)
+          all_rev_detections.append(model_outputs[3])
+          all_rev_regressions.append(model_outputs[4])
+          all_rev_classifs.append(model_outputs[5].softmax(-1))
+      else:
+          assert all(x is None for x in model_outputs[3:])
 
       if args.is_test and i==15:
         break
@@ -150,10 +152,10 @@ def generate_predictions(model, single_clip_dataloader, args, verbose = True):
     all_detections = torch.cat(all_detections)
     all_regressions = torch.cat(all_regressions)
     all_classifs = torch.cat(all_classifs)
-    all_rev_detections = torch.cat(all_rev_detections)
-    all_rev_regressions = torch.cat(all_rev_regressions)
-    all_rev_classifs = torch.cat(all_rev_classifs)
-
+    if model.is_bidirectional:
+        all_rev_detections = torch.cat(all_rev_detections)
+        all_rev_regressions = torch.cat(all_rev_regressions)
+        all_rev_classifs = torch.cat(all_rev_classifs)
 
     ######## Todo: Need better checking that preds are the correct dur
     assert all_detections.size(dim=1) % 2 == 0
@@ -186,7 +188,10 @@ def assemble(d, r, c):
         return assembled_d.detach().cpu().numpy(), assembled_r.detach().cpu().numpy(), assembled_c.detach().cpu().numpy(),
 
     assembled_dets, assembled_regs, assembled_classifs = assemble(all_detections, all_regressions, all_classifs)
-    assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs = assemble(all_rev_detections, all_rev_regressions, all_rev_classifs)
+    if model.is_bidirectional:
+        assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs = assemble(all_rev_detections, all_rev_regressions, all_rev_classifs)
+    else:
+        assembled_rev_dets = assembled_rev_regs = assembled_rev_classifs = None
     return assembled_dets, assembled_regs, assembled_classifs, assembled_rev_dets, assembled_rev_regs, assembled_rev_classifs
 
 def generate_features(model, single_clip_dataloader, args, verbose = True):
@@ -419,7 +424,12 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
     fwd_detections, fwd_regressions, fwd_classifications, bck_detections, bck_regressions, bck_classifications  = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose)
 
     fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose)
-    bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose)
+    if model.is_bidirectional:
+        assert all(x is not None for x in [bck_detections, bck_classifications, bck_regressions])
+        bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose)
+    else:
+        assert all(x is None for x in [bck_detections, bck_classifications, bck_regressions])
+        bck_predictions_fp = None
     annotations_fp = dataloader_dict[fn].dataset.annot_fp
 
     fns.append(fn)
@@ -431,20 +441,21 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   return manifest
 
 def evaluate_based_on_manifest(manifest, args, output_dir, iou, class_threshold, comb_discard_threshold):
-  pred_types = ('fwd', 'bck', 'comb', 'match')
+  pred_types = ('fwd', 'bck', 'comb', 'match') if args.bidirectional else ('fwd',)
   metrics = {p:{} for p in pred_types}
   conf_mats = {p:{} for p in pred_types}
   conf_mat_labels = {}
 
   for i, row in manifest.iterrows():
-    fn = row['filename']
-    annots_fp = row['annotations_fp']
-    row['comb_predictions_fp'], row['match_predictions_fp'] = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=comb_discard_threshold)
+      fn = row['filename']
+      annots_fp = row['annotations_fp']
+      if args.bidirectional:
+        row['comb_predictions_fp'], row['match_predictions_fp'] = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=comb_discard_threshold)
 
-    for pred_type in pred_types:
-        preds_fp = row[f'{pred_type}_predictions_fp']
-        metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold)
-        conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold)
+      for pred_type in pred_types:
+          preds_fp = row[f'{pred_type}_predictions_fp']
+          metrics[pred_type][fn] = get_metrics(preds_fp, annots_fp, args, iou, class_threshold)
+          conf_mats[pred_type][fn], conf_mat_labels[pred_type] = get_confusion_matrix(preds_fp, annots_fp, args, iou, class_threshold)
 
   if output_dir is not None:
     if not os.path.exists(output_dir):
@@ -452,7 +463,7 @@ def evaluate_based_on_manifest(manifest, args, output_dir, iou, class_threshold,
 
   # summarize and save metrics
   conf_mat_summaries = {}
-  for pred_type in ('fwd', 'bck', 'comb', 'match'):
+  for pred_type in pred_types:
       summary = summarize_metrics(metrics[pred_type])
       metrics[pred_type]['summary'] = summary
       metrics[pred_type]['macro'] = macro_metrics(summary)
diff --git a/voxaboxen/model/model.py b/voxaboxen/model/model.py
index 5fe4439..5f8a6a8 100644
--- a/voxaboxen/model/model.py
+++ b/voxaboxen/model/model.py
@@ -44,11 +44,17 @@ def unfreeze(self):
 class DetectionModel(nn.Module):
   def __init__(self, args, embedding_dim=768):
       super().__init__()
+      self.is_bidirectional = args.bidirectional
+      self.is_stereo = args.stereo
+      self.is_segmentation = args.segmentation
+      if self.is_stereo:
+          embedding_dim *= 2
       self.encoder = AvesEmbedding(args)
       self.args = args
       aves_sr = args.sr // args.scale_factor
       self.detection_head = DetectionHead(args, embedding_dim = embedding_dim)
-      self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim)
+      if self.is_bidirectional:
+          self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim)
 
   def forward(self, x):
       """
@@ -64,7 +70,12 @@ def forward(self, x):
       expected_dur_output = math.ceil(x.size(1)/self.args.scale_factor)
 
       x = x-torch.mean(x,axis=1,keepdim=True)
-      feats = self.encoder(x)
+      if self.is_stereo:
+          feats0 = self.encoder(x[:,0,:])
+          feats1 = self.encoder(x[:,1,:])
+          feats = torch.cat([feats0,feats1],dim=-1)
+      else:
+          feats = self.encoder(x)
 
       #aves may be off by 1 sample from expected
       pad = expected_dur_output - feats.size(1)
@@ -73,8 +84,11 @@ def forward(self, x):
 
       detection_logits, regression, class_logits = self.detection_head(feats)
       detection_probs = torch.sigmoid(detection_logits)
-      rev_detection_logits, rev_regression, rev_class_logits = self.rev_detection_head(feats)
-      rev_detection_probs = torch.sigmoid(rev_detection_logits)
+      if self.is_bidirectional:
+          rev_detection_logits, rev_regression, rev_class_logits = self.rev_detection_head(feats)
+          rev_detection_probs = torch.sigmoid(rev_detection_logits)
+      else:
+          rev_detection_probs = rev_regression = rev_class_logits = None
 
       return detection_probs, regression, class_logits, rev_detection_probs, rev_regression, rev_class_logits
 
@@ -161,7 +175,6 @@ def forward(self, x):
 
     return detection_probs, regression, class_logits
 
-
 def rms_and_mixup(X, d, r, y, train, args):
   if args.rms_norm:
     ms = torch.mean(X ** 2, dim = -1, keepdim = True)
diff --git a/voxaboxen/training/params.py b/voxaboxen/training/params.py
index b4846b0..3f0e239 100644
--- a/voxaboxen/training/params.py
+++ b/voxaboxen/training/params.py
@@ -23,6 +23,8 @@ def parse_args(args,allow_unknown=False):
   parser.add_argument('--num-workers', type=int, default=8)
 
   # Model
+  parser.add_argument('--bidirectional', action='store_true', help="train and inference in both directions and combine results")
+  parser.add_argument('--segmentation', action='store_true')
   parser.add_argument('--sr', type=int, default=16000)
   parser.add_argument('--scale-factor', type=int, default = 320, help = "downscaling performed by aves")
   parser.add_argument('--aves-model-weight-fp', type=str, default = "weights/aves-base-bio.torchaudio.pt")
@@ -77,6 +79,9 @@ def parse_args(args,allow_unknown=False):
   if args.clip_hop is None:
     setattr(args, "clip_hop", args.clip_duration/2)
 
+  if args.bidirectional and args.segmentation:
+    raise ValueError("bidirectional and segmentation settings are not currently compatible")
+
   if allow_unknown:
     return args, remaining
   else:
diff --git a/voxaboxen/training/train.py b/voxaboxen/training/train.py
index d7e9db8..adb7a90 100644
--- a/voxaboxen/training/train.py
+++ b/voxaboxen/training/train.py
@@ -65,7 +65,7 @@ def train(model, args):
       if use_val:
         eval_scores = val_epoch(model, t, val_dataloader, args)
         # TODO: maybe plot evals for other pred_types
-        val_evals.append(eval_scores['comb'].copy())
+        val_evals.append(eval_scores['fwd'].copy())
         plot_eval(train_evals, learning_rates, args, val_evals=val_evals)
 
         val_evals_by_epoch = {i : e for i, e in enumerate(val_evals)}
@@ -77,7 +77,7 @@ def train(model, args):
       scheduler.step()
 
       if use_val and args.early_stopping:
-        current_f1 = eval_scores['comb']['f1']
+        current_f1 = eval_scores['comb']['f1'] if model.is_bidirectional else eval_scores['fwd']['f1']
         if current_f1 > best_f1:
           print('found new best model')
           best_f1 = current_f1
@@ -119,7 +119,7 @@ def train(model, args):
 
   # resave validation with best model
   if use_val:
-    val_epoch(model, t+1, val_dataloader, args)
+    val_epoch(model, args.n_epochs, val_dataloader, args)
 
   return model
 
@@ -153,52 +153,63 @@ def train_epoch(model, t, dataloader, detection_loss_fn, reg_loss_fn, class_loss
 
 
     evals = {}
-    normal_train_loss = 0; normal_losses = []; detection_losses = []; regression_losses = []; class_losses = []
+    train_loss = 0; losses = []; detection_losses = []; regression_losses = []; class_losses = []
     rev_train_loss = 0; rev_losses = []; rev_detection_losses = []; rev_regression_losses = []; rev_class_losses = []
-    train_loss = 0; losses = []
+
     data_iterator = tqdm.tqdm(dataloader)
-    for i, (X, d, r, y, rev_d, rev_r, rev_y) in enumerate(data_iterator):
+    #for i, (X, d, r, y, rev_d, rev_r, rev_y) in enumerate(data_iterator):
+    for i, batch in enumerate(data_iterator):
       num_batches_seen = i
-      X = X.to(device = device, dtype = torch.float)
-      d = d.to(device = device, dtype = torch.float)
-      r = r.to(device = device, dtype = torch.float)
-      y = y.to(device = device, dtype = torch.float)
-      rev_d = rev_d.to(device = device, dtype = torch.float)
-      rev_r = rev_r.to(device = device, dtype = torch.float)
-      rev_y = rev_y.to(device = device, dtype = torch.float)
-
-      X, d, r, y = rms_and_mixup(X, d, r, y, True, args)
-      _, rev_d, rev_r, rev_y = rms_and_mixup(X, rev_d, rev_r, rev_y, True, args)
-      probs, regression, class_logits, rev_probs, rev_regression, rev_class_logits = model(X)
-
+      batch = [item.to(device, dtype=torch.float) for item in batch]
+      X, d, r, y = batch[:4]
+      #X = X.to(device = device, dtype = torch.float)
+      #d = d.to(device = device, dtype = torch.float)
+      #r = r.to(device = device, dtype = torch.float)
+      #y = y.to(device = device, dtype = torch.float)
       # We mask out loss from each end of the clip, so the model isn't forced to learn to detect events that are partially cut off.
       # This does not affect inference, because during inference we overlap clips at 50%
-
+      X, d, r, y = rms_and_mixup(X, d, r, y, True, args)
+      probs, regression, class_logits, rev_probs, rev_regression, rev_class_logits = model(X)
+      #model_outputs = model(X)
+      #probs, regression, class_logits = model_outputs[:3]
       detection_loss, reg_loss, class_loss = lf(d, probs, r, regression, y, class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn)
-      rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_y, rev_class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn)
-      normal_loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
-      rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss
-      loss = (normal_loss + rev_loss)/2
 
+      loss = args.rho * class_loss + detection_loss + args.lamb * reg_loss
       train_loss += loss.item()
-      rev_train_loss += rev_loss.item()
-      normal_train_loss += normal_loss.item()
-      normal_losses.append(normal_loss.item())
-      rev_losses.append(rev_loss.item())
       losses.append(loss.item())
       detection_losses.append(detection_loss.item())
       regression_losses.append(args.lamb * reg_loss.item())
       class_losses.append(args.rho * class_loss.item())
-      rev_detection_losses.append(rev_detection_loss.item())
-      rev_regression_losses.append(args.lamb * rev_reg_loss.item())
-      rev_class_losses.append(args.rho * rev_class_loss.item())
+
+      pbar_str = f"loss {np.mean(losses[-10:]):.5f}, det {np.mean(detection_losses[-10:]):.5f}, reg {np.mean(regression_losses[-10:]):.5f}, class {np.mean(class_losses[-10:]):.5f}"
+
+      if model.is_bidirectional:
+          assert all(x is not None for x in [rev_probs, rev_regression, rev_class_logits])
+          rev_d, rev_r, rev_y = batch[4:]
+          #rev_probs, rev_regression, rev_class_logits = model_outputs[3:]
+          _, rev_d, rev_r, rev_y = rms_and_mixup(X, rev_d, rev_r, rev_y, True, args)
+
+
+          rev_detection_loss, rev_reg_loss, rev_class_loss = lf(rev_d, rev_probs, rev_r, rev_regression, rev_y, rev_class_logits, args=args, reg_loss_fn=reg_loss_fn, class_loss_fn=class_loss_fn)
+          rev_loss = args.rho * rev_class_loss + rev_detection_loss + args.lamb * rev_reg_loss
+          rev_train_loss += rev_loss.item()
+          rev_losses.append(rev_loss.item())
+          rev_detection_losses.append(rev_detection_loss.item())
+          rev_regression_losses.append(args.lamb * rev_reg_loss.item())
+          rev_class_losses.append(args.rho * rev_class_loss.item())
+          loss = (loss + rev_loss)/2
+
+          pbar_str += f" revloss {np.mean(rev_losses[-10:]):.5f}, revdet {np.mean(rev_detection_losses[-10:]):.5f}, revreg {np.mean(rev_regression_losses[-10:]):.5f}, revclass {np.mean(rev_class_losses[-10:]):.5f}"
+      else:
+          assert all(x is None for x in [rev_probs, rev_regression, rev_class_logits])
+
 
       optimizer.zero_grad()
       loss.backward()
 
       optimizer.step()
       if i > 10:
-        data_iterator.set_description(f"loss {np.mean(losses[-10:]):.5f}, det {np.mean(detection_losses[-10:]):.5f}, reg {np.mean(regression_losses[-10:]):.5f}, class {np.mean(class_losses[-10:]):.5f} revloss {np.mean(rev_losses[-10:]):.5f}, revdet {np.mean(rev_detection_losses[-10:]):.5f}, revreg {np.mean(rev_regression_losses[-10:]):.5f}, revclass {np.mean(rev_class_losses[-10:]):.5f}")
+        data_iterator.set_description(pbar_str)
 
       if args.is_test and i == 15: break
 
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index 8e14871..7db1315 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -34,20 +34,14 @@ def train_model(args):
     os.makedirs(args.experiment_output_dir)
 
   save_params(args)
-  if hasattr(args,'stereo') and args.stereo:
-    model = DetectionModelStereo(args)
-  else:
-    model = DetectionModel(args)
+  model = DetectionModel(args)
 
   if args.reload_from is not None:
     checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt'))
     model.load_state_dict(checkpoint['model_state_dict'])
 
   ## Training
-  if args.n_epochs == 0:
-    trained_model = model
-  else:
-      trained_model = train(model, args)
+  trained_model = train(model, args)
 
   ## Evaluation
   test_dataloader = get_test_dataloader(args)
@@ -55,21 +49,22 @@ def train_model(args):
 
   val_manifest = predict_and_generate_manifest(trained_model, val_dataloader, args)
 
-  best_comb_discard_thresh = -1
-  best_f1 = 0
-  for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]:
-    val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh)
-    new_f1 = val_metrics['comb']['macro']['f1']
-    if new_f1 > best_f1:
-      best_comb_discard_thresh = comb_discard_thresh
-      best_f1 = new_f1
-    print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}')
-    print_metrics(val_metrics, just_one_label=(len(args.label_set)==1))
+  model.comb_discard_thresh = -1
+  if model.is_bidirectional:
+      best_f1 = 0
+      for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]:
+        val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh)
+        new_f1 = val_metrics['comb']['macro']['f1']
+        if new_f1 > best_f1:
+          model.comb_discard_thresh = comb_discard_thresh
+          best_f1 = new_f1
+        print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}')
+        print_metrics(val_metrics, just_one_label=(len(args.label_set)==1))
+      print(f'Using comb_discard_thresh: {model.comb_discard_thresh}')
 
   test_manifest = predict_and_generate_manifest(trained_model, test_dataloader, args)
-  print(f'Using thresh: {best_comb_discard_thresh}')
   for iou in [0.2, 0.5, 0.8]:
-    test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=best_comb_discard_thresh)
+    test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=model.comb_discard_thresh)
     print(f'Test with IOU{iou}')
     print_metrics(test_metrics, just_one_label=(len(args.label_set)==1))
 

From cfcfe5055950986f0824fc692b69509ba38de059 Mon Sep 17 00:00:00 2001
From: Lou1sM <lmahonology@gmail.com>
Date: Sat, 18 May 2024 11:49:37 +0100
Subject: [PATCH 11/11] handle loading of maybe-bidirectional model at
 inference

---
 voxaboxen/evaluation/evaluation.py | 10 +++---
 voxaboxen/inference/inference.py   | 53 +++++++++++++++++-------------
 voxaboxen/inference/params.py      |  7 ++--
 voxaboxen/model/model.py           |  1 +
 voxaboxen/training/train_model.py  | 21 ++++++------
 5 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/voxaboxen/evaluation/evaluation.py b/voxaboxen/evaluation/evaluation.py
index b6d5362..8ddbb9f 100644
--- a/voxaboxen/evaluation/evaluation.py
+++ b/voxaboxen/evaluation/evaluation.py
@@ -227,7 +227,7 @@ def generate_features(model, single_clip_dataloader, args, verbose = True):
 
   return all_features.detach().cpu().numpy()
 
-def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, classif_threshold=0):
+def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=True, target_dir=None, detection_threshold=0, classif_threshold=0):
 
   if target_dir is None:
     target_dir = args.experiment_output_dir
@@ -248,7 +248,7 @@ def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=Tr
 #   np.save(target_fp, classifs)
 
   ## peaks
-  det_peaks, properties = find_peaks(dets, height=args.detection_threshold, distance=args.peak_distance)
+  det_peaks, properties = find_peaks(dets, height=detection_threshold, distance=args.peak_distance)
   det_probs = properties['peak_heights']
 
   ## regs and classifs
@@ -278,7 +278,7 @@ def export_to_selection_table(dets, regs, classifs, fn, args, is_bck, verbose=Tr
   bboxes, det_probs, class_idxs, class_probs = pred2bbox(det_peaks, det_probs, durations, class_idxs, class_probs, pred_sr, is_bck)
 
   if args.nms == "soft_nms":
-    bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=args.detection_threshold)
+    bboxes, det_probs, class_idxs, class_probs = soft_nms(bboxes, det_probs, class_idxs, class_probs, sigma=args.soft_nms_sigma, thresh=detection_threshold)
   elif args.nms == "nms":
     bboxes, det_probs, class_idxs, class_probs = nms(bboxes, det_probs, class_idxs, class_probs, iou_thresh=args.nms_thresh)
 
@@ -423,10 +423,10 @@ def predict_and_generate_manifest(model, dataloader_dict, args, verbose = True):
   for fn in dataloader_dict:
     fwd_detections, fwd_regressions, fwd_classifications, bck_detections, bck_regressions, bck_classifications  = generate_predictions(model, dataloader_dict[fn], args, verbose=verbose)
 
-    fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose)
+    fwd_predictions_fp = export_to_selection_table(fwd_detections, fwd_regressions, fwd_classifications, fn, args, is_bck=False, verbose=verbose, detection_threshold=args.detection_threshold)
     if model.is_bidirectional:
         assert all(x is not None for x in [bck_detections, bck_classifications, bck_regressions])
-        bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose)
+        bck_predictions_fp = export_to_selection_table(bck_detections, bck_regressions, bck_classifications, fn, args, is_bck=True, verbose=verbose, detection_threshold=args.detection_threshold)
     else:
         assert all(x is None for x in [bck_detections, bck_classifications, bck_regressions])
         bck_predictions_fp = None
diff --git a/voxaboxen/inference/inference.py b/voxaboxen/inference/inference.py
index f223587..adfe9ce 100644
--- a/voxaboxen/inference/inference.py
+++ b/voxaboxen/inference/inference.py
@@ -6,54 +6,63 @@
 from voxaboxen.inference.params import parse_inference_args
 from voxaboxen.training.params import load_params
 from voxaboxen.model.model import DetectionModel, DetectionModelStereo
-from voxaboxen.evaluation.evaluation import generate_predictions, export_to_selection_table
+from voxaboxen.evaluation.evaluation import generate_predictions, export_to_selection_table, combine_fwd_bck_preds
 from voxaboxen.data.data import get_single_clip_data
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
 def inference(inference_args):
   inference_args = parse_inference_args(inference_args)
-  args = load_params(inference_args.model_args_fp)  
+  args = load_params(inference_args.model_args_fp)
   files_to_infer = pd.read_csv(inference_args.file_info_for_inference)
-  
+
   output_dir = os.path.join(args.experiment_dir, 'inference')
   if not os.path.exists(output_dir):
-    os.makedirs(output_dir)  
-  
-  # model  
-  if hasattr(args,'stereo') and args.stereo:
-    model = DetectionModelStereo(args)
-  else:
-    model = DetectionModel(args)
-  model_checkpoint_fp = os.path.join(args.experiment_dir, "model.pt")
+    os.makedirs(output_dir)
+
+  # model
+  #if hasattr(args,'stereo') and args.stereo:
+    #model = DetectionModelStereo(args)
+  #else:
+  model = DetectionModel(args)
+  model_checkpoint_fp = os.path.join(args.experiment_dir, "final-model.pt")
   print(f"Loading model weights from {model_checkpoint_fp}")
   cp = torch.load(model_checkpoint_fp)
-  model.load_state_dict(cp["model_state_dict"])
+  model.load_state_dict(cp)
   model = model.to(device)
-  
+
   for i, row in files_to_infer.iterrows():
     audio_fp = row['audio_fp']
     fn = row['fn']
-    
+
     if not os.path.exists(audio_fp):
       print(f"Could not locate file {audio_fp}")
       continue
-    
+
     try:
       dataloader = get_single_clip_data(audio_fp, args.clip_duration/2, args)
     except:
       print(f"Could not load file {audio_fp}")
       continue
-    
+
     if len(dataloader) == 0:
       print(f"Skipping {fn} because it is too short")
       continue
-                
-    detections, regressions, classifications = generate_predictions(model, dataloader, args, verbose = True)
-    
-    target_fp = export_to_selection_table(detections, regressions, classifications, fn, args, verbose=True, target_dir=output_dir, detection_threshold = inference_args.detection_threshold, classification_threshold = inference_args.classification_threshold)
-        
-    print(f"Saving predictions for {fn} to {target_fp}")
+
+    if inference_args.disable_bidirectional and not model.is_bidirectional:
+        print('Warning: you have passed the disable-bidirectional arg but model is not is_bidirectional')
+    detections, regressions, classifs, rev_detections, rev_regressions, rev_classifs = generate_predictions(model, dataloader, args, verbose = True)
+    fwd_target_fp = export_to_selection_table(detections, regressions, classifs, fn, args, is_bck=False, verbose=True, target_dir=output_dir, detection_threshold=inference_args.detection_threshold, classif_threshold=inference_args.classification_threshold)
+    if model.is_bidirectional and not inference_args.disable_bidirectional:
+        rev_target_fp = export_to_selection_table(rev_detections, rev_regressions, rev_classifs, fn, args, is_bck=True, verbose=True, target_dir=output_dir, detection_threshold=inference_args.detection_threshold, classif_threshold=inference_args.classification_threshold)
+        comb_target_fp, match_target_fp = combine_fwd_bck_preds(args.experiment_output_dir, fn, comb_iou_threshold=args.comb_iou_threshold, comb_discard_threshold=model.comb_discard_thresh.item())
+        print(f"Saving predictions for {fn} to {comb_target_fp}")
+
+
+    #preds_manifest = predict_and_generate_manifest(model, dataloader_dict
+
+    else:
+        print(f"Saving predictions for {fn} to {fwd_target_fp}")
 
 if __name__ == "__main__":
     main(sys.argv[1:])
diff --git a/voxaboxen/inference/params.py b/voxaboxen/inference/params.py
index e6c5274..b54b3c2 100644
--- a/voxaboxen/inference/params.py
+++ b/voxaboxen/inference/params.py
@@ -5,11 +5,12 @@
 
 def parse_inference_args(inference_args):
   parser = argparse.ArgumentParser()
-  
+
   parser.add_argument('--model-args-fp', type=str, required=True, help = "filepath of model params saved as a yaml")
   parser.add_argument('--file-info-for-inference', type=str, required=True, help = "filepath of info csv listing filenames and filepaths of audio for inference")
   parser.add_argument('--detection-threshold', type=float, default=0.5, help="detection peaks need to be at or above this threshold to make it into the exported selection table")
   parser.add_argument('--classification-threshold', type=float, default=0.0, help="classification probability needs to be at or above this threshold to not be labeled as Unknown")
-  
-  inference_args = parser.parse_args(inference_args)  
+  parser.add_argument('--disable-bidirectional', action='store_true')
+
+  inference_args = parser.parse_args(inference_args)
   return inference_args
diff --git a/voxaboxen/model/model.py b/voxaboxen/model/model.py
index 5f8a6a8..6d472de 100644
--- a/voxaboxen/model/model.py
+++ b/voxaboxen/model/model.py
@@ -53,6 +53,7 @@ def __init__(self, args, embedding_dim=768):
       self.args = args
       aves_sr = args.sr // args.scale_factor
       self.detection_head = DetectionHead(args, embedding_dim = embedding_dim)
+      self.comb_discard_thresh = nn.Parameter(torch.tensor(0.))
       if self.is_bidirectional:
           self.rev_detection_head = DetectionHead(args, embedding_dim = embedding_dim)
 
diff --git a/voxaboxen/training/train_model.py b/voxaboxen/training/train_model.py
index 7db1315..2f68b92 100644
--- a/voxaboxen/training/train_model.py
+++ b/voxaboxen/training/train_model.py
@@ -1,12 +1,12 @@
+import torch.nn as nn
 import pandas as pd
-from voxaboxen.data.data import get_test_dataloader, get_val_dataloader
 import torch
-from voxaboxen.model.model import DetectionModel, DetectionModelStereo
+from voxaboxen.data.data import get_test_dataloader, get_val_dataloader
+from voxaboxen.model.model import DetectionModel
 from voxaboxen.training.train import train
 from voxaboxen.training.params import parse_args, set_seed, save_params
-from voxaboxen.evaluation.evaluation import generate_predictions, export_to_selection_table, get_metrics, summarize_metrics, predict_and_generate_manifest, evaluate_based_on_manifest
+from voxaboxen.evaluation.evaluation import predict_and_generate_manifest, evaluate_based_on_manifest
 
-import yaml
 import sys
 import os
 
@@ -36,9 +36,9 @@ def train_model(args):
   save_params(args)
   model = DetectionModel(args)
 
-  if args.reload_from is not None:
-    checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt'))
-    model.load_state_dict(checkpoint['model_state_dict'])
+  #if args.reload_from is not None:
+    #checkpoint = torch.load(os.path.join(args.project_dir, args.reload_from, 'model.pt'))
+    #model.load_state_dict(checkpoint['model_state_dict'])
 
   ## Training
   trained_model = train(model, args)
@@ -49,14 +49,13 @@ def train_model(args):
 
   val_manifest = predict_and_generate_manifest(trained_model, val_dataloader, args)
 
-  model.comb_discard_thresh = -1
   if model.is_bidirectional:
       best_f1 = 0
       for comb_discard_thresh in [.3,.35,.4,.45,.5,.55,.6,.65,.75,.8,.85,.9]:
         val_metrics, val_conf_mats = evaluate_based_on_manifest(val_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=0.5, class_threshold=0.5, comb_discard_threshold=comb_discard_thresh)
         new_f1 = val_metrics['comb']['macro']['f1']
         if new_f1 > best_f1:
-          model.comb_discard_thresh = comb_discard_thresh
+          model.comb_discard_thresh = nn.Parameter(torch.tensor(comb_discard_thresh))
           best_f1 = new_f1
         print(f'IOU: 0.5 class_thresh: 0.5 Comb discard threshold: {comb_discard_thresh}')
         print_metrics(val_metrics, just_one_label=(len(args.label_set)==1))
@@ -64,10 +63,12 @@ def train_model(args):
 
   test_manifest = predict_and_generate_manifest(trained_model, test_dataloader, args)
   for iou in [0.2, 0.5, 0.8]:
-    test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=model.comb_discard_thresh)
+    test_metrics, test_conf_mats = evaluate_based_on_manifest(test_manifest, args, output_dir = os.path.join(args.experiment_dir, 'test_results') , iou=iou, class_threshold=0.5, comb_discard_threshold=model.comb_discard_thresh.item())
     print(f'Test with IOU{iou}')
     print_metrics(test_metrics, just_one_label=(len(args.label_set)==1))
 
+  torch.save(model.state_dict(), os.path.join(args.experiment_dir, 'final-model.pt'))
+
 if __name__ == "__main__":
   train_model(sys.argv[1:])