diff --git a/bounding-box/src/lib.rs b/bounding-box/src/lib.rs index e0eccb0..7247a2b 100644 --- a/bounding-box/src/lib.rs +++ b/bounding-box/src/lib.rs @@ -3,8 +3,37 @@ pub mod nms; pub mod roi; use nalgebra::{Point, Point2, Point3, SVector, SimdPartialOrd, SimdValue}; -pub trait Num: num::Num + Copy + core::fmt::Debug + 'static {} -impl Num for T {} +pub trait Num: + num::Num + + core::ops::AddAssign + + core::ops::SubAssign + + core::ops::MulAssign + + core::ops::DivAssign + + core::cmp::PartialOrd + + core::cmp::PartialEq + + nalgebra::SimdPartialOrd + + nalgebra::SimdValue + + Copy + + core::fmt::Debug + + 'static +{ +} +impl< + T: num::Num + + core::ops::AddAssign + + core::ops::SubAssign + + core::ops::MulAssign + + core::ops::DivAssign + + core::cmp::PartialOrd + + core::cmp::PartialEq + + nalgebra::SimdPartialOrd + + nalgebra::SimdValue + + Copy + + core::fmt::Debug + + 'static, +> Num for T +{ +} /// An axis aligned bounding box in `D` dimensions, defined by the minimum vertex and a size vector. #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -20,16 +49,26 @@ pub type Aabb2 = AxisAlignedBoundingBox; pub type Aabb3 = AxisAlignedBoundingBox; impl AxisAlignedBoundingBox { - pub fn new(point: Point, size: SVector) -> Self { + // Panics if max < min + pub fn new(min_point: Point, max_point: Point) -> Self { + if max_point < min_point { + panic!("max_point must be greater than or equal to min_point"); + } + Self::from_min_max_vertices(min_point, max_point) + } + pub fn try_new(min_point: Point, max_point: Point) -> Option { + if max_point < min_point { + return None; + } + Some(Self::from_min_max_vertices(min_point, max_point)) + } + pub fn new_point_size(point: Point, size: SVector) -> Self { Self { point, size } } - pub fn from_min_max_vertices(point1: Point, point2: Point) -> Self - where - T: core::ops::SubAssign, - { + pub fn from_min_max_vertices(point1: Point, point2: Point) -> Self { let size = point2 - point1; - Self::new(point1, SVector::from(size)) + Self::new_point_size(point1, SVector::from(size)) } /// Only considers the points closest and furthest from origin @@ -151,7 +190,21 @@ impl AxisAlignedBoundingBox { self.intersection(other) } - pub fn union(&self, other: &Self) -> Self + pub fn component_clamp(&self, min: T, max: T) -> Self + where + T: PartialOrd, + { + let mut this = *self; + this.point.iter_mut().for_each(|x| { + *x = nalgebra::clamp(*x, min, max); + }); + this.size.iter_mut().for_each(|x| { + *x = nalgebra::clamp(*x, min, max); + }); + this + } + + pub fn merge(&self, other: &Self) -> Self where T: core::ops::AddAssign, T: core::ops::SubAssign, @@ -168,6 +221,21 @@ impl AxisAlignedBoundingBox { Self::from_min_max_vertices(min, max) } + pub fn union(&self, other: &Self) -> T + where + T: core::ops::AddAssign, + T: core::ops::SubAssign, + T: core::ops::MulAssign, + T: PartialOrd, + T: nalgebra::SimdValue, + T: nalgebra::SimdPartialOrd, + { + self.measure() + other.measure() + - Self::intersection(self, other) + .map(|x| x.measure()) + .unwrap_or(T::zero()) + } + pub fn intersection(&self, other: &Self) -> Option where T: core::ops::AddAssign, @@ -176,21 +244,13 @@ impl AxisAlignedBoundingBox { T: nalgebra::SimdPartialOrd, T: nalgebra::SimdValue, { - let self_min = self.min_vertex(); - let self_max = self.max_vertex(); - let other_min = other.min_vertex(); - let other_max = other.max_vertex(); + let inter_min = self.min_vertex().inf(&other.min_vertex()); + let inter_max = self.max_vertex().sup(&other.max_vertex()); - if self_max < other_min || other_max < self_min { + if inter_max < inter_min { return None; // No intersection } - - let min = self_min.sup(&other_min); - let max = self_max.inf(&other_max); - Some(Self::from_min_max_vertices( - Point::from(min), - Point::from(max), - )) + Some(Self::new(inter_min, inter_max)) } pub fn denormalize(&self, factor: nalgebra::SVector) -> Self @@ -233,7 +293,7 @@ impl AxisAlignedBoundingBox { self.size.product() } - pub fn iou(&self, other: &Self) -> Option + pub fn iou(&self, other: &Self) -> T where T: core::ops::AddAssign, T: core::ops::SubAssign, @@ -242,9 +302,12 @@ impl AxisAlignedBoundingBox { T: nalgebra::SimdValue, T: core::ops::MulAssign, { - let intersection = self.intersection(other)?; + let intersection = self + .intersection(other) + .map(|v| v.measure()) + .unwrap_or(T::zero()); let union = self.union(other); - Some(intersection.measure() / union.measure()) + intersection / union } } @@ -257,13 +320,6 @@ impl Aabb2 { let point2 = Point2::new(x2, y2); Self::from_min_max_vertices(point1, point2) } - pub fn new_2d(point1: Point2, point2: Point2) -> Self - where - T: core::ops::SubAssign, - { - let size = point2.coords - point1.coords; - Self::new(point1, SVector::from(size)) - } pub fn x1y1(&self) -> Point2 { self.point } @@ -327,14 +383,6 @@ impl Aabb2 { } impl Aabb3 { - pub fn new_3d(point1: Point3, point2: Point3) -> Self - where - T: core::ops::SubAssign, - { - let size = point2.coords - point1.coords; - Self::new(point1, SVector::from(size)) - } - pub fn volume(&self) -> T where T: core::ops::MulAssign, @@ -349,7 +397,7 @@ fn test_bbox_new() { let point1 = Point2::new(1.0, 2.0); let point2 = Point2::new(4.0, 6.0); - let bbox = AxisAlignedBoundingBox::new_2d(point1, point2); + let bbox = AxisAlignedBoundingBox::new(point1, point2); assert_eq!(bbox.min_vertex(), point1); assert_eq!(bbox.size(), Vector2::new(3.0, 4.0)); @@ -414,7 +462,7 @@ fn test_bounding_box_contains_2d() { let point1 = Point2::new(1.0, 2.0); let point2 = Point2::new(4.0, 6.0); - let bbox = AxisAlignedBoundingBox::new_2d(point1, point2); + let bbox = AxisAlignedBoundingBox::new(point1, point2); assert!(bbox.contains_point(&Point2::new(2.0, 3.0))); assert!(!bbox.contains_point(&Point2::new(5.0, 7.0))); @@ -426,13 +474,13 @@ fn test_bounding_box_union_2d() { let point1 = Point2::new(1.0, 2.0); let point2 = Point2::new(4.0, 6.0); - let bbox1 = AxisAlignedBoundingBox::new_2d(point1, point2); + let bbox1 = AxisAlignedBoundingBox::new(point1, point2); let point3 = Point2::new(3.0, 5.0); let point4 = Point2::new(7.0, 8.0); - let bbox2 = AxisAlignedBoundingBox::new_2d(point3, point4); + let bbox2 = AxisAlignedBoundingBox::new(point3, point4); - let union_bbox = bbox1.union(&bbox2); + let union_bbox = bbox1.merge(&bbox2); assert_eq!(union_bbox.min_vertex(), Point2::new(1.0, 2.0)); assert_eq!(union_bbox.size(), Vector2::new(6.0, 6.0)); } @@ -443,11 +491,11 @@ fn test_bounding_box_intersection_2d() { let point1 = Point2::new(1.0, 2.0); let point2 = Point2::new(4.0, 6.0); - let bbox1 = AxisAlignedBoundingBox::new_2d(point1, point2); + let bbox1 = AxisAlignedBoundingBox::new(point1, point2); let point3 = Point2::new(3.0, 5.0); let point4 = Point2::new(5.0, 7.0); - let bbox2 = AxisAlignedBoundingBox::new_2d(point3, point4); + let bbox2 = AxisAlignedBoundingBox::new(point3, point4); let intersection_bbox = bbox1.intersection(&bbox2).unwrap(); assert_eq!(intersection_bbox.min_vertex(), Point2::new(3.0, 5.0)); @@ -460,7 +508,7 @@ fn test_bounding_box_contains_point() { let point1 = Point2::new(2, 3); let point2 = Point2::new(5, 4); - let bbox = AxisAlignedBoundingBox::new_2d(point1, point2); + let bbox = AxisAlignedBoundingBox::new(point1, point2); use itertools::Itertools; for (i, j) in (0..=10).cartesian_product(0..=10) { if bbox.contains_point(&Point2::new(i, j)) { diff --git a/bounding-box/src/nms.rs b/bounding-box/src/nms.rs index d2a652c..ff534d2 100644 --- a/bounding-box/src/nms.rs +++ b/bounding-box/src/nms.rs @@ -29,56 +29,71 @@ where + nalgebra::SimdValue + nalgebra::SimdPartialOrd, { - use itertools::Itertools; - - // Create vector of (index, box, score) tuples for boxes with scores above threshold - let mut indexed_boxes: Vec<(usize, &Aabb2, &T)> = boxes - .iter() - .enumerate() - .zip(scores.iter()) - .filter_map(|((idx, bbox), score)| { - if *score >= score_threshold { - Some((idx, bbox, score)) - } else { - None - } - }) + let mut indices: Vec = (0..boxes.len()) + .filter(|&i| scores[i] >= score_threshold) .collect(); - // Sort by score in descending order - indexed_boxes.sort_by(|(_, _, score_a), (_, _, score_b)| { - score_b - .partial_cmp(score_a) + indices.sort_by(|&i, &j| { + scores[j] + .partial_cmp(&scores[i]) .unwrap_or(std::cmp::Ordering::Equal) }); - let mut keep_indices = HashSet::new(); - let mut suppressed = HashSet::new(); + let mut selected_indices = HashSet::new(); - for (i, (idx_i, bbox_i, _)) in indexed_boxes.iter().enumerate() { - // Skip if this box is already suppressed - if suppressed.contains(idx_i) { - continue; - } + while let Some(¤t) = indices.first() { + selected_indices.insert(current); + indices.remove(0); - // Keep this box - keep_indices.insert(*idx_i); - - // Compare with remaining boxes - for (idx_j, bbox_j, _) in indexed_boxes.iter().skip(i + 1) { - // Skip if this box is already suppressed - if suppressed.contains(idx_j) { - continue; - } - - // Calculate IoU and suppress if above threshold - if let Some(iou) = bbox_i.iou(bbox_j) { - if iou >= nms_threshold { - suppressed.insert(*idx_j); - } - } - } + indices.retain(|&i| { + // let iou = calculate_iou(&boxes[current], &boxes[i]); + let iou = boxes[current].iou(&boxes[i]); + iou < nms_threshold + }); } - keep_indices + selected_indices +} + +/// Calculate the Intersection over Union (IoU) between two bounding boxes. +/// +/// # Arguments +/// +/// * `box1` - The first bounding box. +/// * `box2` - The second bounding box. +/// +/// # Returns +/// +/// The IoU value as a floating-point number. +fn calculate_iou(box1: &Aabb2, box2: &Aabb2) -> T +where + T: Num + num::Float, + T: core::ops::MulAssign, + T: core::ops::AddAssign, + T: core::ops::SubAssign, + T: nalgebra::SimdValue, + T: nalgebra::SimdPartialOrd, +{ + // let inter_min_x = box1.min_vertex().x.max(box2.min_vertex().x); + // let inter_min_y = box1.min_vertex().y.max(box2.min_vertex().y); + // let inter_max_x = box1.maxs.x.min(box2.max_vertex().x); + // let inter_max_y = box1.maxs.y.min(box2.max_vertex().y); + let inter_min = box1.min_vertex().inf(&box2.min_vertex()); + let inter_max = box1.max_vertex().sup(&box2.max_vertex()); + + // let inter_width = (inter_max_x - inter_min_x).max(T::zero()); + // let inter_height = (inter_max_y - inter_min_y).max(T::zero()); + // let inter_width = (inter_max.x - inter_min.x).max(T::zero()); + // let inter_height = (inter_max.y - inter_min.y).max(T::zero()); + // let inter_area = inter_width * inter_height; + + let inter_area = Aabb2::new(inter_min, inter_max); + let inter_area_2 = box1.intersection(box2); + let union = box1.area() + box2.area() - inter_area.area(); + assert_eq!(Some(inter_area), inter_area_2); + assert_eq!(box1.union(&box2), union); + + let inter_area = inter_area.area(); + + inter_area / (box1.area() + box2.area() - inter_area) } diff --git a/src/facedet/retinaface.rs b/src/facedet/retinaface.rs index b10c040..2f4ea17 100644 --- a/src/facedet/retinaface.rs +++ b/src/facedet/retinaface.rs @@ -6,15 +6,77 @@ use nalgebra::{Point2, Vector2}; use ndarray_resize::NdFir; use std::path::Path; -pub struct FaceDetectionConfig {} +/// Configuration for face detection postprocessing +#[derive(Debug, Clone, PartialEq)] +pub struct FaceDetectionConfig { + /// Minimum confidence to keep a detection + pub threshold: f32, + /// NMS threshold for suppressing overlapping boxes + pub nms_threshold: f32, + /// Variances for bounding box decoding + pub variances: [f32; 2], + /// The step size (stride) for each feature map + pub steps: Vec, + /// The minimum anchor sizes for each feature map + pub min_sizes: Vec>, + /// Whether to clip bounding boxes to the image dimensions + pub clamp: bool, + /// Input image width (used for anchor generation) + pub input_width: usize, + /// Input image height (used for anchor generation) + pub input_height: usize, +} -impl FaceDetectionConfig {} +impl FaceDetectionConfig { + pub fn with_threshold(mut self, threshold: f32) -> Self { + self.threshold = threshold; + self + } + pub fn with_nms_threshold(mut self, nms_threshold: f32) -> Self { + self.nms_threshold = nms_threshold; + self + } + pub fn with_variances(mut self, variances: [f32; 2]) -> Self { + self.variances = variances; + self + } + pub fn with_steps(mut self, steps: Vec) -> Self { + self.steps = steps; + self + } + pub fn with_min_sizes(mut self, min_sizes: Vec>) -> Self { + self.min_sizes = min_sizes; + self + } + pub fn with_clip(mut self, clip: bool) -> Self { + self.clamp = clip; + self + } + pub fn with_input_width(mut self, input_width: usize) -> Self { + self.input_width = input_width; + self + } + pub fn with_input_height(mut self, input_height: usize) -> Self { + self.input_height = input_height; + self + } +} impl Default for FaceDetectionConfig { fn default() -> Self { - FaceDetectionConfig {} + Self { + threshold: 0.5, + nms_threshold: 0.4, + variances: [0.1, 0.2], + steps: vec![8, 16, 32], + min_sizes: vec![vec![16, 32], vec![64, 128], vec![256, 512]], + clamp: true, + input_width: 1024, + input_height: 1024, + } } } + pub struct FaceDetection { handle: mnn_sync::SessionHandle, } @@ -50,8 +112,112 @@ pub struct FaceDetectionOutput { pub landmark: Vec, } +fn generate_anchors(config: &FaceDetectionConfig) -> ndarray::Array2 { + let mut anchors = Vec::new(); + let feature_maps: Vec<(usize, usize)> = config + .steps + .iter() + .map(|&step| { + ( + (config.input_height as f32 / step as f32).ceil() as usize, + (config.input_width as f32 / step as f32).ceil() as usize, + ) + }) + .collect(); + + for (k, f) in feature_maps.iter().enumerate() { + let min_sizes = &config.min_sizes[k]; + for i in 0..f.0 { + for j in 0..f.1 { + for &min_size in min_sizes { + let s_kx = min_size as f32 / config.input_width as f32; + let s_ky = min_size as f32 / config.input_height as f32; + let dense_cx = + (j as f32 + 0.5) * config.steps[k] as f32 / config.input_width as f32; + let dense_cy = + (i as f32 + 0.5) * config.steps[k] as f32 / config.input_height as f32; + anchors.push([ + dense_cx - s_kx / 2., + dense_cy - s_ky / 2., + dense_cx + s_kx / 2., + dense_cy + s_ky / 2., + ]); + } + } + } + } + + ndarray::Array2::from_shape_vec((anchors.len(), 4), anchors.into_iter().flatten().collect()) + .unwrap() +} + impl FaceDetectionModelOutput { pub fn postprocess(self, config: &FaceDetectionConfig) -> Result { + use ndarray::s; + + let priors = generate_anchors(config); + + let scores = self.confidence.slice(s![0, .., 1]); + let boxes = self.bbox.slice(s![0, .., ..]); + let landmarks_raw = self.landmark.slice(s![0, .., ..]); + + let mut decoded_boxes = Vec::new(); + let mut decoded_landmarks = Vec::new(); + let mut confidences = Vec::new(); + + for i in 0..priors.shape()[0] { + if scores[i] > config.threshold { + let prior = priors.row(i); + let loc = boxes.row(i); + let landm = landmarks_raw.row(i); + + // Decode bounding box + let prior_cx = (prior[0] + prior[2]) / 2.0; + let prior_cy = (prior[1] + prior[3]) / 2.0; + let prior_w = prior[2] - prior[0]; + let prior_h = prior[3] - prior[1]; + + let var = config.variances; + let cx = prior_cx + loc[0] * var[0] * prior_w; + let cy = prior_cy + loc[1] * var[0] * prior_h; + let w = prior_w * (loc[2] * var[1]).exp(); + let h = prior_h * (loc[3] * var[1]).exp(); + + let xmin = cx - w / 2.0; + let ymin = cy - h / 2.0; + let xmax = cx + w / 2.0; + let ymax = cy + h / 2.0; + + let mut bbox = + Aabb2::from_min_max_vertices(Point2::new(xmin, ymin), Point2::new(xmax, ymax)); + if config.clamp { + bbox.component_clamp(0.0, 1.0); + } + decoded_boxes.push(bbox); + + // Decode landmarks + let mut points = [Point2::new(0.0, 0.0); 5]; + for j in 0..5 { + points[j].x = prior_cx + landm[j * 2] * var[0] * prior_w; + points[j].y = prior_cy + landm[j * 2 + 1] * var[0] * prior_h; + } + let landmarks = FaceLandmarks { + left_eye: points[0], + right_eye: points[1], + nose: points[2], + left_mouth: points[3], + right_mouth: points[4], + }; + decoded_landmarks.push(landmarks); + confidences.push(scores[i]); + } + } + + Ok(FaceDetectionProcessedOutput { + bbox: decoded_boxes, + confidence: confidences, + landmarks: decoded_landmarks, + }) } }