From 583dfb79bf5671f315abf00a3ea56a50b569cf20 Mon Sep 17 00:00:00 2001
From: David Humphrey <david.andrew.humphrey@gmail.com>
Date: Mon, 30 Oct 2023 18:12:09 -0400
Subject: [PATCH] Add -i option to include bounding box images in clips
 directory

---
 README.md                   |  3 ++-
 action.py                   |  7 +++++++
 scripts/terrestrial-demo.sh |  2 +-
 src/action.py               | 37 ++++++++++++++++++++++++++++++++-----
 src/base_detector.py        | 10 +++-------
 src/clip_manager.py         | 36 +++++++++++++++++++++++++++++++++---
 6 files changed, 78 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 96b653a..7e72526 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ $ python3 action.py
 
 usage: action.py [-h] [-e {terrestrial,aquatic}] [-b BUFFER] [-c CONFIDENCE]
                  [-m MIN_DURATION] [-f SKIP_FRAMES] [-d] [-o OUTPUT_DIR] [-s]
-                 [--log-level {DEBUG,INFO,WARNING,ERROR}]
+                 [-i] [--log-level {DEBUG,INFO,WARNING,ERROR}]
                  filename [filename ...]
 action.py: error: the following arguments are required: filename
 ```
@@ -76,6 +76,7 @@ Action can be configured to run in different ways using various arguments and fl
 | `-d`, `--delete-previous-clips` | Whether to delete clips from previous interrupted or old runs before processing a video again. | `--delete-previous-clips` |
 | `-o`, `--output-dir` | Output directory to use for all clips. | `--output-dir ./output` |
 | `-s`, `--show-detections` | Whether to visually show detection frames with bounding boxes. | `--show-detections` |
+| `i`, `--include-bbox-images` | Whether to include the bounding box images for the frames that trigger or extend each detection event, along with the videos in the clips directory. |
 | `--log-level` | Logging level. Can be `DEBUG`, `INFO`, `WARNING`, or `ERROR`. Defaults to `INFO`. | `--log-level DEBUG` |
 
 > [!NOTE]
diff --git a/action.py b/action.py
index 773189e..293125a 100755
--- a/action.py
+++ b/action.py
@@ -71,6 +71,13 @@
         dest="show_detections",
         help="Whether to show detection frames with bounding boxes",
     )
+    parser.add_argument(
+        "-i",
+        "--include-bbox-images",
+        action="store_true",
+        dest="include_bbox_images",
+        help="Whether to include the bounding box images for the frames that trigger or extend each detection event, along with the videos in the clips directory.",
+    )
     parser.add_argument(
         "--log-level",
         choices=["DEBUG", "INFO", "WARNING", "ERROR"],
diff --git a/scripts/terrestrial-demo.sh b/scripts/terrestrial-demo.sh
index d70d713..e9ffb23 100755
--- a/scripts/terrestrial-demo.sh
+++ b/scripts/terrestrial-demo.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-python3 action.py ./video/terrestrial-demo.mov -c 0.45 -m 3.0 -s -b 1.0 -d -e terrestrial
+python3 action.py ./video/terrestrial-demo.mov -c 0.45 -m 3.0 -s -i -b 1.0 -d -e terrestrial
diff --git a/src/action.py b/src/action.py
index b139ff6..d7b5a15 100755
--- a/src/action.py
+++ b/src/action.py
@@ -95,6 +95,7 @@ def process_frames(
     buffer_seconds = detector.buffer
     min_detection_duration = detector.min_duration
     show_detections = args.show_detections
+    include_bbox_images = args.include_bbox_images
 
     # Number of frames per minute of video time
     frames_per_minute = 60 * fps
@@ -157,8 +158,22 @@ def process_frames(
                         logger.info(
                             f"{detector.class_name} detected, extending detection event: {format_time(frame_count / fps + buffer_seconds)} (max confidence={format_percent(detection_highest_confidence)})"
                         )
-                        if show_detections:
-                            detector.draw_detections(frame, boxes, video_path)
+                        if show_detections or include_bbox_images:
+                            # Generate an image with bounding boxes drawn on top
+                            bbox_img = detector.draw_detections(frame, boxes)
+
+                            if show_detections:
+                                # Show the bbox image in a window
+                                cv2.imshow(video_path, bbox_img)
+                                cv2.waitKey(1)
+
+                            if include_bbox_images:
+                                # Write the bbox image to the clips directory
+                                frame_time = frame_count / fps
+                                clips.create_bbox_image(
+                                    frame_time, bbox_img, video_path
+                                )
+
                         break
             else:
                 # If no detection was made within the buffer period, and we didn't
@@ -198,8 +213,20 @@ def process_frames(
                     logger.info(
                         f"{detector.class_name} detected, starting detection event: {format_time(frame_count / fps)} (max confidence={format_percent(detection_highest_confidence)})"
                     )
-                    if show_detections:
-                        detector.draw_detections(frame, boxes, video_path)
+                    if show_detections or include_bbox_images:
+                        # Generate an image with bounding boxes drawn on top
+                        bbox_img = detector.draw_detections(frame, boxes)
+
+                        if show_detections:
+                            # Show the bbox image in a window
+                            cv2.imshow(video_path, bbox_img)
+                            cv2.waitKey(1)
+
+                        if include_bbox_images:
+                            # Write the bbox image to the clips directory
+                            clips.create_bbox_image(
+                                detection_start_time, bbox_img, video_path
+                            )
                     detection_event = True
 
         # We've finished processing this frame
@@ -308,7 +335,7 @@ def main(args):
 
             # If we're not using a common clips dir, reset the counter for future clips
             if not output_dir:
-                clips.reset_clip_count()
+                clips.reset()
 
             clip_count_before = clips.get_clip_count()
 
diff --git a/src/base_detector.py b/src/base_detector.py
index 8ad8f69..a2f21f0 100644
--- a/src/base_detector.py
+++ b/src/base_detector.py
@@ -110,15 +110,13 @@ def detect(self, image_src):
         # Return boxes[0] if it exists, otherwise return an empty list
         return boxes[0] if boxes else []
 
-    def draw_detections(self, img, boxes, title):
+    def draw_detections(self, img, boxes):
         """
-        Draw bounding boxes on the image for detected objects and show
-        in a window.
+        Draw bounding boxes on the image for detected objects and return
 
         Args:
             img: Image on which to draw bounding boxes.
             boxes: List of bounding boxes.
-            title: Title for the image window.
         """
         img = np.copy(img)
         width = img.shape[1]
@@ -162,9 +160,7 @@ def draw_detections(self, img, boxes, title):
             )
 
             img = cv2.rectangle(img, (x1, y1), (x2, y2), bgr, bbox_thick)
-
-        cv2.imshow(title, img)
-        cv2.waitKey(1)
+        return img
 
     def post_processing(self, outputs):
         """
diff --git a/src/clip_manager.py b/src/clip_manager.py
index 897d32a..846e1cb 100644
--- a/src/clip_manager.py
+++ b/src/clip_manager.py
@@ -14,6 +14,8 @@
 
 from .utils import format_time
 
+import cv2
+
 
 def get_clips_dir(video_path):
     """
@@ -92,6 +94,7 @@ class ClipManager:
         stop_event (Event): The stop event for the clip process.
         clip_process (Process): The clip process.
         clip_count (int): The current clip count.
+        bbox_count (int): The current bbox image count.
     """
 
     def __init__(self, logger, output_dir):
@@ -112,6 +115,7 @@ def __init__(self, logger, output_dir):
         )
         self.clip_process.start()
         self.clip_count = 0
+        self.bbox_count = 0
 
     def create_clip_process(self, queue, stop_event):
         """
@@ -190,6 +194,31 @@ def create_clip(self, clip_start_time, clip_end_time, video_path):
             (clip_start_time, clip_end_time, self.clip_count, video_path)
         )
 
+    def create_bbox_image(self, clip_time, bbox_img, video_path):
+        """
+        Write a bounding box image to the clips directory
+
+        Args:
+            clip_time (float): The time of the bounding box.
+            bbox_img: The bounding box image.
+            video_path (str): The path to the video file.
+
+        Returns:
+            None
+        """
+
+        self.bbox_count += 1
+
+        # Create a bbox image for the given detection
+        base_dir = self.output_dir if self.output_dir else get_clips_dir(video_path)
+        bbox_filename = (
+            f"{base_dir}/{(self.bbox_count):04}-{format_time(clip_time, '_')}.jpg"
+        )
+        create_output_dir(os.path.dirname(bbox_filename))
+
+        # Write the bbox image to the clips directory as a JPG
+        cv2.imwrite(bbox_filename, bbox_img)
+
     def stop(self):
         """
         Let the queue know it's time to stop processing new clip
@@ -213,15 +242,16 @@ def cleanup(self):
             self.clip_queue.put((None, None, None, None))
             self.clip_process.join()
 
-    def reset_clip_count(self):
+    def reset(self):
         """
-        Reset the clip count to 0.
+        Reset the clip and bbox counts to 0.
 
         Returns:
             None
         """
-        self.logger.debug("Resetting clip manager clip_count to 0")
+        self.logger.debug("Resetting clip manager counts to 0")
         self.clip_count = 0
+        self.bbox_count = 0
 
     def get_clip_count(self):
         """