fix: A lot of fixes relating to bounding-boxes

This commit is contained in:
uttarayan21
2025-08-05 14:39:16 +05:30
parent 561fb2a924
commit 42ac210bba
4 changed files with 104 additions and 42 deletions

View File

@@ -2,7 +2,7 @@ pub mod draw;
pub mod nms; pub mod nms;
pub mod roi; pub mod roi;
use nalgebra::{Point, Point2, Point3, SVector}; use nalgebra::{Point, Point2, Point3, SVector, SimdPartialOrd, SimdValue};
pub trait Num: num::Num + Copy + core::fmt::Debug + 'static {} pub trait Num: num::Num + Copy + core::fmt::Debug + 'static {}
impl<T: num::Num + Copy + core::fmt::Debug + 'static> Num for T {} impl<T: num::Num + Copy + core::fmt::Debug + 'static> Num for T {}
@@ -85,10 +85,7 @@ impl<T: Num, const D: usize> AxisAlignedBoundingBox<T, D> {
self.point += translation; self.point += translation;
} }
pub fn min_vertex(&self) -> Point<T, D> pub fn min_vertex(&self) -> Point<T, D> {
where
T: core::ops::SubAssign,
{
self.point self.point
} }
@@ -140,16 +137,18 @@ impl<T: Num, const D: usize> AxisAlignedBoundingBox<T, D> {
other_min >= self_min && other_max <= self_max other_min >= self_min && other_max <= self_max
} }
pub fn clamp(&self, other: &Self) -> Self pub fn clamp(&self, other: &Self) -> Option<Self>
where where
T: core::ops::AddAssign, T: core::ops::AddAssign,
T: core::ops::SubAssign, T: core::ops::SubAssign,
T: PartialOrd, T: PartialOrd,
T: nalgebra::SimdPartialOrd,
T: nalgebra::SimdValue,
{ {
if self.contains_bbox(other) { if other.contains_bbox(self) {
return *other; return Some(*self);
} }
todo!() self.intersection(other)
} }
pub fn union(&self, other: &Self) -> Self pub fn union(&self, other: &Self) -> Self
@@ -157,18 +156,16 @@ impl<T: Num, const D: usize> AxisAlignedBoundingBox<T, D> {
T: core::ops::AddAssign, T: core::ops::AddAssign,
T: core::ops::SubAssign, T: core::ops::SubAssign,
T: PartialOrd, T: PartialOrd,
T: nalgebra::SimdValue,
T: nalgebra::SimdPartialOrd,
{ {
let self_min = self.min_vertex(); let self_min = self.min_vertex();
let self_max = self.max_vertex(); let self_max = self.max_vertex();
let other_min = other.min_vertex(); let other_min = other.min_vertex();
let other_max = other.max_vertex(); let other_max = other.max_vertex();
let max_of_min = (self_min.coords < other_min.coords) let min = self_min.inf(&other_min);
.then_some(self_min.coords) let max = self_max.sup(&other_max);
.unwrap_or(other_min.coords); Self::from_min_max_vertices(min, max)
let min_of_max = (self_max.coords > other_max.coords)
.then_some(self_max.coords)
.unwrap_or(other_max.coords);
Self::from_min_max_vertices(Point::from(max_of_min), Point::from(min_of_max))
} }
pub fn intersection(&self, other: &Self) -> Option<Self> pub fn intersection(&self, other: &Self) -> Option<Self>
@@ -176,6 +173,8 @@ impl<T: Num, const D: usize> AxisAlignedBoundingBox<T, D> {
T: core::ops::AddAssign, T: core::ops::AddAssign,
T: core::ops::SubAssign, T: core::ops::SubAssign,
T: PartialOrd, T: PartialOrd,
T: nalgebra::SimdPartialOrd,
T: nalgebra::SimdValue,
{ {
let self_min = self.min_vertex(); let self_min = self.min_vertex();
let self_max = self.max_vertex(); let self_max = self.max_vertex();
@@ -186,12 +185,8 @@ impl<T: Num, const D: usize> AxisAlignedBoundingBox<T, D> {
return None; // No intersection return None; // No intersection
} }
let min = (self_min.coords > other_min.coords) let min = self_min.sup(&other_min);
.then_some(self_min.coords) let max = self_max.inf(&other_max);
.unwrap_or(other_min.coords);
let max = (self_max.coords < other_max.coords)
.then_some(self_max.coords)
.unwrap_or(other_max.coords);
Some(Self::from_min_max_vertices( Some(Self::from_min_max_vertices(
Point::from(min), Point::from(min),
Point::from(max), Point::from(max),
@@ -211,7 +206,7 @@ impl<T: Num, const D: usize> AxisAlignedBoundingBox<T, D> {
} }
} }
pub fn cast<T2>(&self) -> Option<Aabb<T2, D>> pub fn try_cast<T2>(&self) -> Option<Aabb<T2, D>>
where where
// T: num::NumCast, // T: num::NumCast,
T2: Num + simba::scalar::SubsetOf<T>, T2: Num + simba::scalar::SubsetOf<T>,
@@ -231,6 +226,26 @@ impl<T: Num, const D: usize> AxisAlignedBoundingBox<T, D> {
// size: self.size.as_(), // size: self.size.as_(),
// }) // })
// } // }
pub fn measure(&self) -> T
where
T: core::ops::MulAssign,
{
self.size.product()
}
pub fn iou(&self, other: &Self) -> Option<T>
where
T: core::ops::AddAssign,
T: core::ops::SubAssign,
T: PartialOrd,
T: nalgebra::SimdPartialOrd,
T: nalgebra::SimdValue,
T: core::ops::MulAssign,
{
let intersection = self.intersection(other)?;
let union = self.union(other);
Some(intersection.measure() / union.measure())
}
} }
impl<T: Num> Aabb2<T> { impl<T: Num> Aabb2<T> {
@@ -305,20 +320,9 @@ impl<T: Num> Aabb2<T> {
pub fn area(&self) -> T pub fn area(&self) -> T
where where
T: core::ops::Mul<Output = T>, T: core::ops::MulAssign,
{ {
self.size.x * self.size.y self.measure()
}
pub fn iou(&self, other: &Self) -> Option<T>
where
T: core::ops::AddAssign,
T: core::ops::SubAssign,
T: PartialOrd,
{
let intersection = self.intersection(other)?;
let union = self.union(other);
Some(intersection.area() / union.area())
} }
} }
@@ -333,9 +337,9 @@ impl<T: Num> Aabb3<T> {
pub fn volume(&self) -> T pub fn volume(&self) -> T
where where
T: core::ops::Mul<Output = T>, T: core::ops::MulAssign,
{ {
self.size.x * self.size.y * self.size.z self.measure()
} }
} }
@@ -476,3 +480,19 @@ fn test_bounding_box_contains_point() {
} }
} }
} }
#[test]
fn test_bounding_box_clamp_box_2d() {
let bbox1 = Aabb2::from_x1y1x2y2(1, 1, 4, 4);
let bbox2 = Aabb2::from_x1y1x2y2(2, 2, 3, 3);
let clamped = bbox2.clamp(&bbox1).unwrap();
assert_eq!(bbox2, clamped);
let clamped = bbox1.clamp(&bbox2).unwrap();
assert_eq!(bbox2, clamped);
let bbox1 = Aabb2::from_x1y1x2y2(4, 5, 7, 8);
let bbox2 = Aabb2::from_x1y1x2y2(5, 4, 8, 7);
let clamped = bbox1.clamp(&bbox2).unwrap();
let expected = Aabb2::from_x1y1x2y2(5, 5, 7, 7);
assert_eq!(clamped, expected)
}

View File

@@ -18,7 +18,14 @@ pub fn nms<T>(
nms_threshold: T, nms_threshold: T,
) -> Vec<Aabb2<T>> ) -> Vec<Aabb2<T>>
where where
T: Num + num::Float + core::iter::Product<T> + core::ops::AddAssign + core::ops::SubAssign, T: Num
+ num::Float
+ core::iter::Product<T>
+ core::ops::AddAssign
+ core::ops::SubAssign
+ core::ops::MulAssign
+ nalgebra::SimdValue
+ nalgebra::SimdPartialOrd,
{ {
use itertools::Itertools; use itertools::Itertools;
let bboxes: Vec<_> = boxes let bboxes: Vec<_> = boxes

View File

@@ -62,9 +62,18 @@ pub struct FaceDetectionModelOutput {
pub landmark: ndarray::Array3<f32>, pub landmark: ndarray::Array3<f32>,
} }
/// Represents the 5 facial landmarks detected by RetinaFace
pub struct FaceLandmarks {
pub left_eye: Point2<f32>,
pub right_eye: Point2<f32>,
pub nose: Point2<f32>,
pub left_mouth: Point2<f32>,
pub right_mouth: Point2<f32>,
}
pub struct FaceDetectionProcessedOutput { pub struct FaceDetectionProcessedOutput {
pub bbox: Vec<Aabb2<f32>>, pub bbox: Vec<Aabb2<f32>>,
pub confidence: Vec<f32>, pub confidence: Vec<f32>,
pub landmarks: Vec<FaceLandmarks>,
} }
impl FaceDetectionModelOutput { impl FaceDetectionModelOutput {
@@ -87,10 +96,12 @@ impl FaceDetectionModelOutput {
} }
let mut boxes = Vec::new(); let mut boxes = Vec::new();
let mut scores = Vec::new(); let mut scores = Vec::new();
let mut landmarks = Vec::new();
let var0 = config.variance[0]; let var0 = config.variance[0];
let var1 = config.variance[1]; let var1 = config.variance[1];
let bbox_data = self.bbox; let bbox_data = self.bbox;
let conf_data = self.confidence; let conf_data = self.confidence;
let landmark_data = self.landmark;
let num_priors = bbox_data.shape()[1]; let num_priors = bbox_data.shape()[1];
for idx in 0..num_priors { for idx in 0..num_priors {
let dx = bbox_data[[0, idx, 0]]; let dx = bbox_data[[0, idx, 0]];
@@ -110,11 +121,35 @@ impl FaceDetectionModelOutput {
if score > config.threshold { if score > config.threshold {
boxes.push(Aabb2::from_x1y1x2y2(x_min, y_min, x_max, y_max)); boxes.push(Aabb2::from_x1y1x2y2(x_min, y_min, x_max, y_max));
scores.push(score); scores.push(score);
let left_eye_x = landmark_data[[0, idx, 0]] * anchor_w * var0 + anchor_cx;
let left_eye_y = landmark_data[[0, idx, 1]] * anchor_h * var0 + anchor_cy;
let right_eye_x = landmark_data[[0, idx, 2]] * anchor_w * var0 + anchor_cx;
let right_eye_y = landmark_data[[0, idx, 3]] * anchor_h * var0 + anchor_cy;
let nose_x = landmark_data[[0, idx, 4]] * anchor_w * var0 + anchor_cx;
let nose_y = landmark_data[[0, idx, 5]] * anchor_h * var0 + anchor_cy;
let left_mouth_x = landmark_data[[0, idx, 6]] * anchor_w * var0 + anchor_cx;
let left_mouth_y = landmark_data[[0, idx, 7]] * anchor_h * var0 + anchor_cy;
let right_mouth_x = landmark_data[[0, idx, 8]] * anchor_w * var0 + anchor_cx;
let right_mouth_y = landmark_data[[0, idx, 9]] * anchor_h * var0 + anchor_cy;
landmarks.push(FaceLandmarks {
left_eye: Point2::new(left_eye_x, left_eye_y),
right_eye: Point2::new(right_eye_x, right_eye_y),
nose: Point2::new(nose_x, nose_y),
left_mouth: Point2::new(left_mouth_x, left_mouth_y),
right_mouth: Point2::new(right_mouth_x, right_mouth_y),
});
} }
} }
Ok(FaceDetectionProcessedOutput { Ok(FaceDetectionProcessedOutput {
bbox: boxes, bbox: boxes,
confidence: scores, confidence: scores,
landmarks,
}) })
} }
} }

View File

@@ -29,11 +29,11 @@ pub fn main() -> Result<()> {
.change_context(errors::Error) .change_context(errors::Error)
.attach_printable("Failed to detect faces")?; .attach_printable("Failed to detect faces")?;
// output.print(20); // output.print(20);
let aabbs = output let faces = output
.postprocess(FaceDetectionConfig::default().with_threshold(detect.threshold)) .postprocess(FaceDetectionConfig::default().with_threshold(detect.threshold))
.change_context(errors::Error) .change_context(errors::Error)
.attach_printable("Failed to attach context")?; .attach_printable("Failed to attach context")?;
for bbox in aabbs { for bbox in faces.bbox {
tracing::info!("Detected face: {:?}", bbox); tracing::info!("Detected face: {:?}", bbox);
use bounding_box::draw::*; use bounding_box::draw::*;
let bbox = bbox let bbox = bbox
@@ -41,7 +41,7 @@ pub fn main() -> Result<()> {
array.shape()[1] as f32, array.shape()[1] as f32,
array.shape()[0] as f32, array.shape()[0] as f32,
)) ))
.cast() .try_cast()
.ok_or(errors::Error) .ok_or(errors::Error)
.attach_printable("Failed to cast f32 to usize")?; .attach_printable("Failed to cast f32 to usize")?;
array.draw(bbox, color::palette::css::GREEN_YELLOW.to_rgba8(), 10); array.draw(bbox, color::palette::css::GREEN_YELLOW.to_rgba8(), 10);