feat: output aabbs from retinaface

This commit is contained in:
uttarayan21
2025-08-04 11:43:16 +05:30
parent c751654799
commit f55f0ab089
4 changed files with 220 additions and 182 deletions

213
src/facedet/retinaface.rs Normal file
View File

@@ -0,0 +1,213 @@
use crate::errors::*;
use bounding_box::Aabb2;
use error_stack::ResultExt;
use mnn_bridge::ndarray::*;
use nalgebra::{Point2, Vector2};
use ndarray_resize::NdFir;
use std::path::Path;
pub struct FaceDetectionConfig {
min_sizes: Vec<Vector2<usize>>,
steps: Vec<usize>,
variance: Vec<f32>,
}
impl Default for FaceDetectionConfig {
fn default() -> Self {
FaceDetectionConfig {
min_sizes: vec![
Vector2::new(16, 32),
Vector2::new(64, 128),
Vector2::new(256, 512),
],
steps: vec![8, 16, 32],
variance: vec![0.1, 0.2],
}
}
}
pub struct FaceDetection {
handle: mnn_sync::SessionHandle,
}
pub struct FaceDetectionModelOutput {
pub bbox: ndarray::Array3<f32>,
pub confidence: ndarray::Array3<f32>,
pub landmark: ndarray::Array3<f32>,
}
impl FaceDetectionModelOutput {
pub fn postprocess(self, config: FaceDetectionConfig) -> Result<Vec<Aabb2<f32>>> {
let mut anchors = Vec::new();
for (k, &step) in config.steps.iter().enumerate() {
let feature_size = 640 / step;
let min_sizes = config.min_sizes[k];
let sizes = [min_sizes.x, min_sizes.y];
for i in 0..feature_size {
for j in 0..feature_size {
for &size in &sizes {
let cx = (j as f32 + 0.5) * step as f32 / 640.0;
let cy = (i as f32 + 0.5) * step as f32 / 640.0;
let s_k = size as f32 / 640.0;
anchors.push((cx, cy, s_k, s_k));
}
}
}
}
let mut boxes = Vec::new();
let var0 = config.variance[0];
let var1 = config.variance[1];
let bbox_data = self.bbox;
let conf_data = self.confidence;
let num_priors = bbox_data.shape()[1];
for idx in 0..num_priors {
let dx = bbox_data[[0, idx, 0]];
let dy = bbox_data[[0, idx, 1]];
let dw = bbox_data[[0, idx, 2]];
let dh = bbox_data[[0, idx, 3]];
let (anchor_cx, anchor_cy, anchor_w, anchor_h) = anchors[idx];
let pred_cx = anchor_cx + dx * var0 * anchor_w;
let pred_cy = anchor_cy + dy * var0 * anchor_h;
let pred_w = anchor_w * (dw * var1).exp();
let pred_h = anchor_h * (dh * var1).exp();
let x_min = pred_cx - pred_w / 2.0;
let y_min = pred_cy - pred_h / 2.0;
let x_max = pred_cx + pred_w / 2.0;
let y_max = pred_cy + pred_h / 2.0;
let score = conf_data[[0, idx, 1]];
if score > 0.6 {
boxes.push(Aabb2::from_min_max_vertices(
Point2::new(x_min, y_min),
Point2::new(x_max, y_max),
));
}
}
Ok(boxes)
}
}
impl FaceDetectionModelOutput {
pub fn print(&self, limit: usize) {
tracing::info!("Detected {} faces", self.bbox.shape()[1]);
for (bbox, confidence) in self
.bbox
.clone()
.remove_axis(ndarray::Axis(0))
.axis_iter(ndarray::Axis(0))
.zip(
self.confidence
.clone()
.remove_axis(ndarray::Axis(0))
.axis_iter(ndarray::Axis(0))
.map(|c| c[1]),
)
.filter(|(_, c)| *c > 0.1)
.take(limit)
{
tracing::info!("Face BBox: {:?}, Confidence: {:.2}", bbox, confidence);
}
}
}
impl FaceDetection {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let model = std::fs::read(path)
.change_context(Error)
.attach_printable("Failed to read model file")?;
Self::new_from_bytes(&model)
}
pub fn new_from_bytes(model: &[u8]) -> Result<Self> {
tracing::info!("Loading face detection model from bytes");
let mut model = mnn::Interpreter::from_bytes(model)
.map_err(|e| e.into_inner())
.change_context(Error)
.attach_printable("Failed to load model from bytes")?;
model.set_session_mode(mnn::SessionMode::Release);
let bc = mnn::BackendConfig::default().with_memory_mode(mnn::MemoryMode::High);
let sc = mnn::ScheduleConfig::new()
.with_type(mnn::ForwardType::CPU)
.with_backend_config(bc);
tracing::info!("Creating session handle for face detection model");
let handle = mnn_sync::SessionHandle::new(model, sc)
.change_context(Error)
.attach_printable("Failed to create session handle")?;
Ok(FaceDetection { handle })
}
pub fn detect_faces(&self, image: ndarray::Array3<u8>) -> Result<FaceDetectionModelOutput> {
#[rustfmt::skip]
use ::tap::*;
let output = self
.handle
.run(move |sr| {
let mut resized = image
.fast_resize(640, 640, None)
.change_context(mnn::ErrorKind::TensorError)?
.mapv(|f| f as f32)
.tap_mut(|arr| {
arr.axis_iter_mut(ndarray::Axis(2))
.zip([104, 117, 123])
.for_each(|(mut array, pixel)| {
let pixel = pixel as f32;
array.map_inplace(|v| *v -= pixel);
});
})
.permuted_axes((2, 0, 1))
.insert_axis(ndarray::Axis(0))
.as_standard_layout()
.into_owned();
let tensor = resized
.as_mnn_tensor_mut()
.attach_printable("Failed to convert ndarray to mnn tensor")
.change_context(mnn::error::ErrorKind::TensorError)?;
tracing::trace!("Image Tensor shape: {:?}", tensor.shape());
let (intptr, session) = sr.both_mut();
tracing::trace!("Copying input tensor to host");
unsafe {
let mut input = intptr.input_unresized::<f32>(session, "input")?;
tracing::trace!("Input shape: {:?}", input.shape());
intptr.resize_tensor_by_nchw::<mnn::View<&mut f32>, _>(
input.view_mut(),
1,
3,
640,
640,
);
}
intptr.resize_session(session);
let mut input = intptr.input::<f32>(session, "input")?;
tracing::trace!("Input shape: {:?}", input.shape());
input.copy_from_host_tensor(tensor.view())?;
tracing::info!("Running face detection session");
intptr.run_session(&session)?;
let output_tensor = intptr
.output::<f32>(&session, "bbox")?
.create_host_tensor_from_device(true)
.as_ndarray()
.to_owned();
tracing::trace!("Output Bbox: \t\t{:?}", output_tensor.shape());
let output_confidence = intptr
.output::<f32>(&session, "confidence")?
.create_host_tensor_from_device(true)
.as_ndarray::<ndarray::Ix3>()
.to_owned();
tracing::trace!("Output Confidence: \t{:?}", output_confidence.shape());
let output_landmark = intptr
.output::<f32>(&session, "landmark")?
.create_host_tensor_from_device(true)
.as_ndarray::<ndarray::Ix3>()
.to_owned();
tracing::trace!("Output Landmark: \t{:?}", output_landmark.shape());
Ok(FaceDetectionModelOutput {
bbox: output_tensor,
confidence: output_confidence,
landmark: output_landmark,
})
})
.map_err(|e| e.into_inner())
.change_context(Error)?;
Ok(output)
}
}

1
src/facedet/yolo.rs Normal file
View File

@@ -0,0 +1 @@