feat: Added bounding-box crate

2025-07-15 18:53:16 +05:30
parent 69c768ab0d
commit dc61c6045a
17 changed files with 2848 additions and 27 deletions
--- a/src/facedet.rs
+++ b/src/facedet.rs
@@ -1,12 +1,49 @@
 use crate::errors::*;
 use error_stack::ResultExt;
-use mnn_bridge::ndarray::NdarrayToMnn;
+use mnn_bridge::ndarray::*;
+use nalgebra::Point2;
+use ndarray_resize::NdFir;
 use std::path::Path;

+pub struct FaceDetectionConfig {
+    min_sizes: Vec<Point2<u32>>,
+    steps: Vec<u32>,
+    variance: Vec<f32>,
+}
 pub struct FaceDetection {
    handle: mnn_sync::SessionHandle,
 }

+pub struct FaceDetectionModelOutput {
+    pub bbox: ndarray::Array3<f32>,
+    pub confidence: ndarray::Array3<f32>,
+    pub landmark: ndarray::Array3<f32>,
+}
+
+impl FaceDetectionModelOutput {
+    pub fn print(&self, limit: usize) {
+        tracing::info!("Detected {} faces", self.bbox.shape()[1]);
+
+        for (bbox, confidence) in self
+            .bbox
+            .clone()
+            .remove_axis(ndarray::Axis(0))
+            .axis_iter(ndarray::Axis(0))
+            .zip(
+                self.confidence
+                    .clone()
+                    .remove_axis(ndarray::Axis(0))
+                    .axis_iter(ndarray::Axis(0))
+                    .map(|c| c[1]),
+            )
+            .filter(|(_, c)| *c > 0.1)
+            .take(limit)
+        {
+            tracing::info!("Face BBox: {:?}, Confidence: {:.2}", bbox, confidence);
+        }
+    }
+}
+
 impl FaceDetection {
    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
        let model = std::fs::read(path)
@@ -33,35 +70,78 @@ impl FaceDetection {
        Ok(FaceDetection { handle })
    }

-    pub fn detect_faces(&self, image: ndarray::Array3<u8>) -> Result<ndarray::Array2<u8>> {
-        use mnn_bridge::ndarray::MnnToNdarray;
+    pub fn detect_faces(&self, image: ndarray::Array3<u8>) -> Result<FaceDetectionModelOutput> {
+        use ::tap::*;
        let output = self
            .handle
            .run(move |sr| {
-                let tensor = image
-                    .as_mnn_tensor()
-                    .ok_or_else(|| Error)
+                let mut resized = image
+                    .fast_resize(640, 640, None)
+                    .change_context(mnn::ErrorKind::TensorError)?
+                    .mapv(|f| f as f32)
+                    .tap_mut(|arr| {
+                        arr.axis_iter_mut(ndarray::Axis(2))
+                            .zip([104, 117, 123])
+                            .for_each(|(mut array, pixel)| {
+                                let pixel = pixel as f32;
+                                array.map_inplace(|v| *v -= pixel);
+                            });
+                    })
+                    .permuted_axes((2, 0, 1))
+                    .insert_axis(ndarray::Axis(0))
+                    .as_standard_layout()
+                    .into_owned();
+                let tensor = resized
+                    .as_mnn_tensor_mut()
                    .attach_printable("Failed to convert ndarray to mnn tensor")
                    .change_context(mnn::error::ErrorKind::TensorError)?;
+                tracing::trace!("Image Tensor shape: {:?}", tensor.shape());
                let (intptr, session) = sr.both_mut();
                tracing::trace!("Copying input tensor to host");
-                // let input = intptr.input::<u8>(session, "input")?;
-                // dbg!(input.shape());
-                // let mut t = input.create_host_tensor_from_device(false);
-                // tensor.copy_to_host_tensor(&mut t)?;
-                //
-                // intptr.run_session(&session)?;
-                // let output = intptr.output::<u8>(&session, "output").unwrap();
-                // let output_tensor = output.create_host_tensor_from_device(true);
-                // let output_array = output_tensor
-                //     .try_as_ndarray()
-                //     .change_context(mnn::error::ErrorKind::TensorError)?
-                //     .to_owned();
-                // Ok(output_array)
-                Ok(ndarray::Array2::<u8>::zeros((1, 1))) // Placeholder for actual output
+                unsafe {
+                    let mut input = intptr.input_unresized::<f32>(session, "input")?;
+                    tracing::trace!("Input shape: {:?}", input.shape());
+                    intptr.resize_tensor_by_nchw::<mnn::View<&mut f32>, _>(
+                        input.view_mut(),
+                        1,
+                        3,
+                        640,
+                        640,
+                    );
+                }
+                intptr.resize_session(session);
+                let mut input = intptr.input::<f32>(session, "input")?;
+                tracing::trace!("Input shape: {:?}", input.shape());
+                input.copy_from_host_tensor(tensor.view())?;
+
+                tracing::info!("Running face detection session");
+                intptr.run_session(&session)?;
+                let output_tensor = intptr
+                    .output::<f32>(&session, "bbox")?
+                    .create_host_tensor_from_device(true)
+                    .as_ndarray()
+                    .to_owned();
+                tracing::trace!("Output Bbox: \t\t{:?}", output_tensor.shape());
+                let output_confidence = intptr
+                    .output::<f32>(&session, "confidence")?
+                    .create_host_tensor_from_device(true)
+                    .as_ndarray::<ndarray::Ix3>()
+                    .to_owned();
+                tracing::trace!("Output Confidence: \t{:?}", output_confidence.shape());
+                let output_landmark = intptr
+                    .output::<f32>(&session, "landmark")?
+                    .create_host_tensor_from_device(true)
+                    .as_ndarray::<ndarray::Ix3>()
+                    .to_owned();
+                tracing::trace!("Output Landmark: \t{:?}", output_landmark.shape());
+                Ok(FaceDetectionModelOutput {
+                    bbox: output_tensor,
+                    confidence: output_confidence,
+                    landmark: output_landmark,
+                })
            })
            .map_err(|e| e.into_inner())
-            .change_context(Error);
-        output
+            .change_context(Error)?;
+        Ok(output)
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -19,12 +19,15 @@ pub fn main() -> Result<()> {
                .attach_printable("Failed to create face detection model")?;
            let image = image::open(detect.image).change_context(Error)?;
            let image = image.into_rgb8();
-            let array = image.into_ndarray()
+            let array = image
+                .into_ndarray()
                .change_context(errors::Error)
                .attach_printable("Failed to convert image to ndarray")?;
-            model.detect_faces(array)
+            let output = model
+                .detect_faces(array)
                .change_context(errors::Error)
                .attach_printable("Failed to detect faces")?;
+            output.print(20);
        }
        cli::SubCommand::List(list) => {
            println!("List: {:?}", list);