From 42ac210bba726614b83e63477d313430258bd377 Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Tue, 5 Aug 2025 14:39:16 +0530 Subject: [PATCH] fix: A lot of fixes relating to bounding-boxes --- bounding-box/src/lib.rs | 96 +++++++++++++++++++++++---------------- bounding-box/src/nms.rs | 9 +++- src/facedet/retinaface.rs | 35 ++++++++++++++ src/main.rs | 6 +-- 4 files changed, 104 insertions(+), 42 deletions(-) diff --git a/bounding-box/src/lib.rs b/bounding-box/src/lib.rs index a91262f..e0eccb0 100644 --- a/bounding-box/src/lib.rs +++ b/bounding-box/src/lib.rs @@ -2,7 +2,7 @@ pub mod draw; pub mod nms; pub mod roi; -use nalgebra::{Point, Point2, Point3, SVector}; +use nalgebra::{Point, Point2, Point3, SVector, SimdPartialOrd, SimdValue}; pub trait Num: num::Num + Copy + core::fmt::Debug + 'static {} impl Num for T {} @@ -85,10 +85,7 @@ impl AxisAlignedBoundingBox { self.point += translation; } - pub fn min_vertex(&self) -> Point - where - T: core::ops::SubAssign, - { + pub fn min_vertex(&self) -> Point { self.point } @@ -140,16 +137,18 @@ impl AxisAlignedBoundingBox { other_min >= self_min && other_max <= self_max } - pub fn clamp(&self, other: &Self) -> Self + pub fn clamp(&self, other: &Self) -> Option where T: core::ops::AddAssign, T: core::ops::SubAssign, T: PartialOrd, + T: nalgebra::SimdPartialOrd, + T: nalgebra::SimdValue, { - if self.contains_bbox(other) { - return *other; + if other.contains_bbox(self) { + return Some(*self); } - todo!() + self.intersection(other) } pub fn union(&self, other: &Self) -> Self @@ -157,18 +156,16 @@ impl AxisAlignedBoundingBox { T: core::ops::AddAssign, T: core::ops::SubAssign, T: PartialOrd, + T: nalgebra::SimdValue, + T: nalgebra::SimdPartialOrd, { let self_min = self.min_vertex(); let self_max = self.max_vertex(); let other_min = other.min_vertex(); let other_max = other.max_vertex(); - let max_of_min = (self_min.coords < other_min.coords) - .then_some(self_min.coords) - .unwrap_or(other_min.coords); - let min_of_max = (self_max.coords > other_max.coords) - .then_some(self_max.coords) - .unwrap_or(other_max.coords); - Self::from_min_max_vertices(Point::from(max_of_min), Point::from(min_of_max)) + let min = self_min.inf(&other_min); + let max = self_max.sup(&other_max); + Self::from_min_max_vertices(min, max) } pub fn intersection(&self, other: &Self) -> Option @@ -176,6 +173,8 @@ impl AxisAlignedBoundingBox { T: core::ops::AddAssign, T: core::ops::SubAssign, T: PartialOrd, + T: nalgebra::SimdPartialOrd, + T: nalgebra::SimdValue, { let self_min = self.min_vertex(); let self_max = self.max_vertex(); @@ -186,12 +185,8 @@ impl AxisAlignedBoundingBox { return None; // No intersection } - let min = (self_min.coords > other_min.coords) - .then_some(self_min.coords) - .unwrap_or(other_min.coords); - let max = (self_max.coords < other_max.coords) - .then_some(self_max.coords) - .unwrap_or(other_max.coords); + let min = self_min.sup(&other_min); + let max = self_max.inf(&other_max); Some(Self::from_min_max_vertices( Point::from(min), Point::from(max), @@ -211,7 +206,7 @@ impl AxisAlignedBoundingBox { } } - pub fn cast(&self) -> Option> + pub fn try_cast(&self) -> Option> where // T: num::NumCast, T2: Num + simba::scalar::SubsetOf, @@ -231,6 +226,26 @@ impl AxisAlignedBoundingBox { // size: self.size.as_(), // }) // } + pub fn measure(&self) -> T + where + T: core::ops::MulAssign, + { + self.size.product() + } + + pub fn iou(&self, other: &Self) -> Option + where + T: core::ops::AddAssign, + T: core::ops::SubAssign, + T: PartialOrd, + T: nalgebra::SimdPartialOrd, + T: nalgebra::SimdValue, + T: core::ops::MulAssign, + { + let intersection = self.intersection(other)?; + let union = self.union(other); + Some(intersection.measure() / union.measure()) + } } impl Aabb2 { @@ -305,20 +320,9 @@ impl Aabb2 { pub fn area(&self) -> T where - T: core::ops::Mul, + T: core::ops::MulAssign, { - self.size.x * self.size.y - } - - pub fn iou(&self, other: &Self) -> Option - where - T: core::ops::AddAssign, - T: core::ops::SubAssign, - T: PartialOrd, - { - let intersection = self.intersection(other)?; - let union = self.union(other); - Some(intersection.area() / union.area()) + self.measure() } } @@ -333,9 +337,9 @@ impl Aabb3 { pub fn volume(&self) -> T where - T: core::ops::Mul, + T: core::ops::MulAssign, { - self.size.x * self.size.y * self.size.z + self.measure() } } @@ -476,3 +480,19 @@ fn test_bounding_box_contains_point() { } } } + +#[test] +fn test_bounding_box_clamp_box_2d() { + let bbox1 = Aabb2::from_x1y1x2y2(1, 1, 4, 4); + let bbox2 = Aabb2::from_x1y1x2y2(2, 2, 3, 3); + let clamped = bbox2.clamp(&bbox1).unwrap(); + assert_eq!(bbox2, clamped); + let clamped = bbox1.clamp(&bbox2).unwrap(); + assert_eq!(bbox2, clamped); + + let bbox1 = Aabb2::from_x1y1x2y2(4, 5, 7, 8); + let bbox2 = Aabb2::from_x1y1x2y2(5, 4, 8, 7); + let clamped = bbox1.clamp(&bbox2).unwrap(); + let expected = Aabb2::from_x1y1x2y2(5, 5, 7, 7); + assert_eq!(clamped, expected) +} diff --git a/bounding-box/src/nms.rs b/bounding-box/src/nms.rs index fe0dc5b..04a051f 100644 --- a/bounding-box/src/nms.rs +++ b/bounding-box/src/nms.rs @@ -18,7 +18,14 @@ pub fn nms( nms_threshold: T, ) -> Vec> where - T: Num + num::Float + core::iter::Product + core::ops::AddAssign + core::ops::SubAssign, + T: Num + + num::Float + + core::iter::Product + + core::ops::AddAssign + + core::ops::SubAssign + + core::ops::MulAssign + + nalgebra::SimdValue + + nalgebra::SimdPartialOrd, { use itertools::Itertools; let bboxes: Vec<_> = boxes diff --git a/src/facedet/retinaface.rs b/src/facedet/retinaface.rs index e8ca9c7..d0afe5d 100644 --- a/src/facedet/retinaface.rs +++ b/src/facedet/retinaface.rs @@ -62,9 +62,18 @@ pub struct FaceDetectionModelOutput { pub landmark: ndarray::Array3, } +/// Represents the 5 facial landmarks detected by RetinaFace +pub struct FaceLandmarks { + pub left_eye: Point2, + pub right_eye: Point2, + pub nose: Point2, + pub left_mouth: Point2, + pub right_mouth: Point2, +} pub struct FaceDetectionProcessedOutput { pub bbox: Vec>, pub confidence: Vec, + pub landmarks: Vec, } impl FaceDetectionModelOutput { @@ -87,10 +96,12 @@ impl FaceDetectionModelOutput { } let mut boxes = Vec::new(); let mut scores = Vec::new(); + let mut landmarks = Vec::new(); let var0 = config.variance[0]; let var1 = config.variance[1]; let bbox_data = self.bbox; let conf_data = self.confidence; + let landmark_data = self.landmark; let num_priors = bbox_data.shape()[1]; for idx in 0..num_priors { let dx = bbox_data[[0, idx, 0]]; @@ -110,11 +121,35 @@ impl FaceDetectionModelOutput { if score > config.threshold { boxes.push(Aabb2::from_x1y1x2y2(x_min, y_min, x_max, y_max)); scores.push(score); + + let left_eye_x = landmark_data[[0, idx, 0]] * anchor_w * var0 + anchor_cx; + let left_eye_y = landmark_data[[0, idx, 1]] * anchor_h * var0 + anchor_cy; + + let right_eye_x = landmark_data[[0, idx, 2]] * anchor_w * var0 + anchor_cx; + let right_eye_y = landmark_data[[0, idx, 3]] * anchor_h * var0 + anchor_cy; + + let nose_x = landmark_data[[0, idx, 4]] * anchor_w * var0 + anchor_cx; + let nose_y = landmark_data[[0, idx, 5]] * anchor_h * var0 + anchor_cy; + + let left_mouth_x = landmark_data[[0, idx, 6]] * anchor_w * var0 + anchor_cx; + let left_mouth_y = landmark_data[[0, idx, 7]] * anchor_h * var0 + anchor_cy; + + let right_mouth_x = landmark_data[[0, idx, 8]] * anchor_w * var0 + anchor_cx; + let right_mouth_y = landmark_data[[0, idx, 9]] * anchor_h * var0 + anchor_cy; + + landmarks.push(FaceLandmarks { + left_eye: Point2::new(left_eye_x, left_eye_y), + right_eye: Point2::new(right_eye_x, right_eye_y), + nose: Point2::new(nose_x, nose_y), + left_mouth: Point2::new(left_mouth_x, left_mouth_y), + right_mouth: Point2::new(right_mouth_x, right_mouth_y), + }); } } Ok(FaceDetectionProcessedOutput { bbox: boxes, confidence: scores, + landmarks, }) } } diff --git a/src/main.rs b/src/main.rs index ab5922a..56973bb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,11 +29,11 @@ pub fn main() -> Result<()> { .change_context(errors::Error) .attach_printable("Failed to detect faces")?; // output.print(20); - let aabbs = output + let faces = output .postprocess(FaceDetectionConfig::default().with_threshold(detect.threshold)) .change_context(errors::Error) .attach_printable("Failed to attach context")?; - for bbox in aabbs { + for bbox in faces.bbox { tracing::info!("Detected face: {:?}", bbox); use bounding_box::draw::*; let bbox = bbox @@ -41,7 +41,7 @@ pub fn main() -> Result<()> { array.shape()[1] as f32, array.shape()[0] as f32, )) - .cast() + .try_cast() .ok_or(errors::Error) .attach_printable("Failed to cast f32 to usize")?; array.draw(bbox, color::palette::css::GREEN_YELLOW.to_rgba8(), 10);