Skip to content

Commit

Permalink
added code that will take in yolov8 and should output 3d bounding box…
Browse files Browse the repository at this point in the history
…es (workflow_test)
  • Loading branch information
TiaSinghania committed Jan 14, 2025
1 parent 2d0d09e commit 6c9da8e
Show file tree
Hide file tree
Showing 6 changed files with 4,044 additions and 0 deletions.
40 changes: 40 additions & 0 deletions workflow-test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
THIS IS STOLE ENTIRELY FROM HERE: https://github.com/stereolabs/zed-sdk/tree/master/object%20detection/custom%20detector/python/pytorch_yolov8

# ZED SDK - Object Detection

This sample shows how to detect custom objects using the official Pytorch implementation of YOLOv8 from a ZED camera and ingest them into the ZED SDK to extract 3D informations and tracking for each objects.

## Getting Started

- Get the latest [ZED SDK](https://www.stereolabs.com/developers/release/) and [pyZED Package](https://www.stereolabs.com/docs/app-development/python/install/)
- Check the [Documentation](https://www.stereolabs.com/docs/object-detection/custom-od/)

## Setting up

- Install yolov8 using pip

```sh
pip install ultralytics
```

## Run the program

*NOTE: The ZED v1 is not compatible with this module*

```
python detector.py --weights yolov8m.pt # [--img_size 512 --conf_thres 0.1 --svo path/to/file.svo]
```

### Features

- The camera point cloud is displayed in a 3D OpenGL view
- 3D bounding boxes around detected objects are drawn
- Objects classes and confidences can be changed

## Training your own model

This sample can use any model trained with YOLOv8, including custom trained one. For a getting started on how to trained a model on a custom dataset with YOLOv5, see here https://docs.ultralytics.com/tutorials/train-custom-datasets/

## Support

If you need assistance go to our Community site at https://community.stereolabs.com/
248 changes: 248 additions & 0 deletions workflow-test/cv_viewer/tracking_viewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
import cv2
import numpy as np

from cv_viewer.utils import *
import pyzed.sl as sl
import math
from collections import deque


# ----------------------------------------------------------------------
# 2D LEFT VIEW
# ----------------------------------------------------------------------


def cvt(pt, scale):
"""
Function that scales point coordinates
"""
out = [pt[0] * scale[0], pt[1] * scale[1]]
return out


def get_image_position(bounding_box_image, img_scale):
out_position = np.zeros(2)
out_position[0] = (bounding_box_image[0][0] + (bounding_box_image[2][0] - bounding_box_image[0][0]) * 0.5) * \
img_scale[0]
out_position[1] = (bounding_box_image[0][1] + (bounding_box_image[2][1] - bounding_box_image[0][1]) * 0.5) * \
img_scale[1]
return out_position


def render_2D(left_display, img_scale, objects, is_tracking_on):
overlay = left_display.copy()

line_thickness = 2
for obj in objects.object_list:
if render_object(obj, is_tracking_on):
base_color = generate_color_id_u(obj.id)
# Display image scaled 2D bounding box
top_left_corner = cvt(obj.bounding_box_2d[0], img_scale)
top_right_corner = cvt(obj.bounding_box_2d[1], img_scale)
bottom_right_corner = cvt(obj.bounding_box_2d[2], img_scale)
bottom_left_corner = cvt(obj.bounding_box_2d[3], img_scale)

# Creation of the 2 horizontal lines
cv2.line(left_display, (int(top_left_corner[0]), int(top_left_corner[1])),
(int(top_right_corner[0]), int(top_right_corner[1])), base_color, line_thickness)
cv2.line(left_display, (int(bottom_left_corner[0]), int(bottom_left_corner[1])),
(int(bottom_right_corner[0]), int(bottom_right_corner[1])), base_color, line_thickness)
# Creation of 2 vertical lines
draw_vertical_line(left_display, bottom_left_corner, top_left_corner, base_color, line_thickness)
draw_vertical_line(left_display, bottom_right_corner, top_right_corner, base_color, line_thickness)

# Scaled ROI
roi_height = int(top_right_corner[0] - top_left_corner[0])
roi_width = int(bottom_left_corner[1] - top_left_corner[1])
overlay_roi = overlay[int(top_left_corner[1]):int(top_left_corner[1] + roi_width),
int(top_left_corner[0]):int(top_left_corner[0] + roi_height)]

if obj.mask.is_init():
overlay_roi[obj.mask.numpy() != 0] = base_color

# Display Object label as text
position_image = get_image_position(obj.bounding_box_2d, img_scale)
text_position = (int(position_image[0] - 20), int(position_image[1] - 12))
text = "class " + str(obj.raw_label)
text_color = (255, 255, 255, 255)
cv2.putText(left_display, text, text_position, cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, text_color, 1)

# Diplay Object distance to camera as text
if np.isfinite(obj.position[2]):
text = str(round(abs(obj.position[2]), 1)) + "M"
text_position = (int(position_image[0] - 20), int(position_image[1]))
cv2.putText(left_display, text, text_position, cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, text_color, 1)

# Here, overlay is as the left image, but with opaque masks on each detected objects
cv2.addWeighted(left_display, 0.7, overlay, 0.3, 0.0, left_display)


# ----------------------------------------------------------------------
# 2D TRACKING VIEW
# ----------------------------------------------------------------------

class TrackingViewer:
def __init__(self, res, fps, D_max):
# Window size
self.window_width = res.width
self.window_height = res.height

# Visualisation settings
self.has_background_ready = False
self.background = np.full((self.window_height, self.window_width, 4), [245, 239, 239, 255], np.uint8)

# Invert Z due to Y axis of ocv window
# Show objects between [z_min, 0] (z_min < 0)
self.z_min = -D_max
# Show objects between [x_min, x_max]
self.x_min = self.z_min
self.x_max = -self.x_min

# Conversion from world position to pixel coordinates
self.x_step = (self.x_max - self.x_min) / self.window_width
self.z_step = abs(self.z_min) / self.window_height

self.camera_calibration = sl.CalibrationParameters()

# List of alive tracks
self.tracklets = []

def set_camera_calibration(self, calib):
self.camera_calibration = calib
self.has_background_ready = False

def generate_view(self, objects, current_camera_pose, tracking_view, tracking_enabled):
# To get position in WORLD reference
for obj in objects.object_list:
pos = obj.position
tmp_pos = sl.Translation()
tmp_pos.init_vector(pos[0], pos[1], pos[2])
new_pos = (
tmp_pos * current_camera_pose.get_orientation()).get() + current_camera_pose.get_translation().get()
obj.position = np.array([new_pos[0], new_pos[1], new_pos[2]])

# Initialize visualisation
if not self.has_background_ready:
self.generate_background()

np.copyto(tracking_view, self.background, 'no')

if tracking_enabled:
# First add new points and remove the ones that are too old
current_timestamp = objects.timestamp.get_seconds()
self.add_to_tracklets(objects, current_timestamp)
self.prune_old_points(current_timestamp)

# Draw all tracklets
self.draw_tracklets(tracking_view, current_camera_pose)
else:
self.draw_points(objects.object_list, tracking_view, current_camera_pose)

def add_to_tracklets(self, objects, current_timestamp):
for obj in objects.object_list:
if (obj.tracking_state != sl.OBJECT_TRACKING_STATE.OK) or (not np.isfinite(obj.position[0])) or (
obj.id < 0):
continue

new_object = True
for i in range(len(self.tracklets)):
if self.tracklets[i].id == obj.id:
new_object = False
self.tracklets[i].add_point(obj, current_timestamp)

# In case this object does not belong to existing tracks
if (new_object):
self.tracklets.append(Tracklet(obj, obj.label, current_timestamp))

def prune_old_points(self, ts):
track_to_delete = []
for it in self.tracklets:
if ((ts - it.last_timestamp) > (3)):
track_to_delete.append(it)

for it in track_to_delete:
self.tracklets.remove(it)

# ----------------------------------------------------------------------
# Drawing functions
# ----------------------------------------------------------------------

def draw_points(self, objects, tracking_view, current_camera_pose):
for obj in objects:
if (not np.isfinite(obj.position[0])):
continue
clr = generate_color_id_u(obj.id)
pt = TrackPoint(obj.position)
cv_start_point = self.to_cv_point(pt.get_xyz(), current_camera_pose)
cv2.circle(tracking_view, (int(cv_start_point[0]), int(cv_start_point[1])), 6, clr, 2)

def draw_tracklets(self, tracking_view, current_camera_pose):
for track in self.tracklets:
clr = generate_color_id_u(track.id)
cv_start_point = self.to_cv_point(track.positions[0].get_xyz(), current_camera_pose)
for point_index in range(1, len(track.positions)):
cv_end_point = self.to_cv_point(track.positions[point_index].get_xyz(), current_camera_pose)
cv2.line(tracking_view, (int(cv_start_point[0]), int(cv_start_point[1])),
(int(cv_end_point[0]), int(cv_end_point[1])), clr, 3)
cv_start_point = cv_end_point
cv2.circle(tracking_view, (int(cv_start_point[0]), int(cv_start_point[1])), 6, clr, -1)

def generate_background(self):
camera_color = [255, 230, 204, 255]

# Get FOV intersection with window borders
fov = 2.0 * math.atan(
self.camera_calibration.left_cam.image_size.width / (2.0 * self.camera_calibration.left_cam.fx))

z_at_x_max = self.x_max / math.tan(fov / 2.0)
left_intersection_pt = self.to_cv_point(self.x_min, -z_at_x_max)
right_intersection_pt = self.to_cv_point(self.x_max, -z_at_x_max)

# Drawing camera
camera_pts = np.array([left_intersection_pt
, right_intersection_pt
, [int(self.window_width / 2), self.window_height]]
, dtype=np.int32)
cv2.fillConvexPoly(self.background, camera_pts, camera_color)

def to_cv_point(self, x, z):
out = []
if isinstance(x, float) and isinstance(z, float):
out = [int((x - self.x_min) / self.x_step), int((z - self.z_min) / self.z_step)]
elif isinstance(x, list) and isinstance(z, sl.Pose):
# Go to camera current pose
rotation = z.get_rotation_matrix()
rotation.inverse()
tmp = x - (z.get_translation() * rotation.get_orientation()).get()
new_position = sl.Translation()
new_position.init_vector(tmp[0], tmp[1], tmp[2])
out = [int(((new_position.get()[0] - self.x_min) / self.x_step) + 0.5),
int(((new_position.get()[2] - self.z_min) / self.z_step) + 0.5)]
elif isinstance(x, TrackPoint) and isinstance(z, sl.Pose):
pos = x.get_xyz()
out = self.to_cv_point(pos, z)
else:
print("Unhandled argument type")
return out


class TrackPoint:
def __init__(self, pos_):
self.x = pos_[0]
self.y = pos_[1]
self.z = pos_[2]

def get_xyz(self):
return [self.x, self.y, self.z]


class Tracklet:
def __init__(self, obj_, type_, timestamp_):
self.id = obj_.id
self.object_type = type_
self.positions = deque()
self.add_point(obj_, timestamp_)

def add_point(self, obj_, timestamp_):
self.positions.append(TrackPoint(obj_.position))
self.last_timestamp = timestamp_
38 changes: 38 additions & 0 deletions workflow-test/cv_viewer/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import cv2
import numpy as np
import pyzed.sl as sl

id_colors = [(232, 176, 59),
(175, 208, 25),
(102, 205, 105),
(185, 0, 255),
(99, 107, 252)]


def render_object(object_data, is_tracking_on):
if is_tracking_on:
return object_data.tracking_state == sl.OBJECT_TRACKING_STATE.OK
else:
return (object_data.tracking_state == sl.OBJECT_TRACKING_STATE.OK) or (
object_data.tracking_state == sl.OBJECT_TRACKING_STATE.OFF)


def generate_color_id_u(idx):
arr = []
if idx < 0:
arr = [236, 184, 36, 255]
else:
color_idx = idx % 5
arr = [id_colors[color_idx][0], id_colors[color_idx][1], id_colors[color_idx][2], 255]
return arr


def draw_vertical_line(left_display, start_pt, end_pt, clr, thickness):
n_steps = 7
pt1 = [((n_steps - 1) * start_pt[0] + end_pt[0]) / n_steps
, ((n_steps - 1) * start_pt[1] + end_pt[1]) / n_steps]
pt4 = [(start_pt[0] + (n_steps - 1) * end_pt[0]) / n_steps
, (start_pt[1] + (n_steps - 1) * end_pt[1]) / n_steps]

cv2.line(left_display, (int(start_pt[0]), int(start_pt[1])), (int(pt1[0]), int(pt1[1])), clr, thickness)
cv2.line(left_display, (int(pt4[0]), int(pt4[1])), (int(end_pt[0]), int(end_pt[1])), clr, thickness)
Loading

0 comments on commit 6c9da8e

Please sign in to comment.