diff --git a/src/facedet.rs b/src/facedet.rs index 387cd58..83db01d 100644 --- a/src/facedet.rs +++ b/src/facedet.rs @@ -1,180 +1,2 @@ -use crate::errors::*; -use bounding_box::Aabb2; -use error_stack::ResultExt; -use mnn_bridge::ndarray::*; -use nalgebra::{Point2, Vector2}; -use ndarray_resize::NdFir; -use std::path::Path; - -pub struct FaceDetectionConfig { - min_sizes: Vec>, - steps: Vec, - variance: Vec, -} -pub struct FaceDetection { - handle: mnn_sync::SessionHandle, -} - -pub struct FaceDetectionModelOutput { - pub bbox: ndarray::Array3, - pub confidence: ndarray::Array3, - pub landmark: ndarray::Array3, -} - -impl FaceDetectionModelOutput { - pub fn postprocess(self, config: FaceDetectionConfig) -> Result>> { - // for k, step in enumerate(cfg['steps']): - // feature_size = 640 // step - // for i in range(feature_size): - // for j in range(feature_size): - // for min_size in cfg['min_sizes'][k]: - // cx = (j + 0.5) * step / 640 - // cy = (i + 0.5) * step / 640 - // s_kx = s_ky = min_size / 640 - // anchors.append([cx, cy, s_kx, s_ky]) - let mut anchors = Vec::new(); - config.steps.iter().enumerate().for_each(|(k, step)| { - let feature_size = 640 / step; - for i in 0..feature_size { - for j in 0..feature_size { - for min_size in &config.min_sizes[k] { - let cx = (j as f32 + 0.5) * *step as f32 / 640.0; - let cy = (i as f32 + 0.5) * *step as f32 / 640.0; - let s_kx = *min_size as f32 / 640.0; - let s_ky = *min_size as f32 / 640.0; - anchors.push([cx, cy, s_kx, s_ky]); - } - } - } - }); - - Ok(Vec::new()) - } -} - -impl FaceDetectionModelOutput { - pub fn print(&self, limit: usize) { - tracing::info!("Detected {} faces", self.bbox.shape()[1]); - - for (bbox, confidence) in self - .bbox - .clone() - .remove_axis(ndarray::Axis(0)) - .axis_iter(ndarray::Axis(0)) - .zip( - self.confidence - .clone() - .remove_axis(ndarray::Axis(0)) - .axis_iter(ndarray::Axis(0)) - .map(|c| c[1]), - ) - .filter(|(_, c)| *c > 0.1) - .take(limit) - { - tracing::info!("Face BBox: {:?}, Confidence: {:.2}", bbox, confidence); - } - } -} - -impl FaceDetection { - pub fn new(path: impl AsRef) -> Result { - let model = std::fs::read(path) - .change_context(Error) - .attach_printable("Failed to read model file")?; - Self::new_from_bytes(&model) - } - - pub fn new_from_bytes(model: &[u8]) -> Result { - tracing::info!("Loading face detection model from bytes"); - let mut model = mnn::Interpreter::from_bytes(model) - .map_err(|e| e.into_inner()) - .change_context(Error) - .attach_printable("Failed to load model from bytes")?; - model.set_session_mode(mnn::SessionMode::Release); - let bc = mnn::BackendConfig::default().with_memory_mode(mnn::MemoryMode::High); - let sc = mnn::ScheduleConfig::new() - .with_type(mnn::ForwardType::CoreML) - .with_backend_config(bc); - tracing::info!("Creating session handle for face detection model"); - let handle = mnn_sync::SessionHandle::new(model, sc) - .change_context(Error) - .attach_printable("Failed to create session handle")?; - Ok(FaceDetection { handle }) - } - - pub fn detect_faces(&self, image: ndarray::Array3) -> Result { - #[rustfmt::skip] - use ::tap::*; - let output = self - .handle - .run(move |sr| { - let mut resized = image - .fast_resize(640, 640, None) - .change_context(mnn::ErrorKind::TensorError)? - .mapv(|f| f as f32) - .tap_mut(|arr| { - arr.axis_iter_mut(ndarray::Axis(2)) - .zip([104, 117, 123]) - .for_each(|(mut array, pixel)| { - let pixel = pixel as f32; - array.map_inplace(|v| *v -= pixel); - }); - }) - .permuted_axes((2, 0, 1)) - .insert_axis(ndarray::Axis(0)) - .as_standard_layout() - .into_owned(); - let tensor = resized - .as_mnn_tensor_mut() - .attach_printable("Failed to convert ndarray to mnn tensor") - .change_context(mnn::error::ErrorKind::TensorError)?; - tracing::trace!("Image Tensor shape: {:?}", tensor.shape()); - let (intptr, session) = sr.both_mut(); - tracing::trace!("Copying input tensor to host"); - unsafe { - let mut input = intptr.input_unresized::(session, "input")?; - tracing::trace!("Input shape: {:?}", input.shape()); - intptr.resize_tensor_by_nchw::, _>( - input.view_mut(), - 1, - 3, - 640, - 640, - ); - } - intptr.resize_session(session); - let mut input = intptr.input::(session, "input")?; - tracing::trace!("Input shape: {:?}", input.shape()); - input.copy_from_host_tensor(tensor.view())?; - - tracing::info!("Running face detection session"); - intptr.run_session(&session)?; - let output_tensor = intptr - .output::(&session, "bbox")? - .create_host_tensor_from_device(true) - .as_ndarray() - .to_owned(); - tracing::trace!("Output Bbox: \t\t{:?}", output_tensor.shape()); - let output_confidence = intptr - .output::(&session, "confidence")? - .create_host_tensor_from_device(true) - .as_ndarray::() - .to_owned(); - tracing::trace!("Output Confidence: \t{:?}", output_confidence.shape()); - let output_landmark = intptr - .output::(&session, "landmark")? - .create_host_tensor_from_device(true) - .as_ndarray::() - .to_owned(); - tracing::trace!("Output Landmark: \t{:?}", output_landmark.shape()); - Ok(FaceDetectionModelOutput { - bbox: output_tensor, - confidence: output_confidence, - landmark: output_landmark, - }) - }) - .map_err(|e| e.into_inner()) - .change_context(Error)?; - Ok(output) - } -} +pub mod retinaface; +pub mod yolo; diff --git a/src/facedet/retinaface.rs b/src/facedet/retinaface.rs new file mode 100644 index 0000000..40c12e9 --- /dev/null +++ b/src/facedet/retinaface.rs @@ -0,0 +1,213 @@ +use crate::errors::*; +use bounding_box::Aabb2; +use error_stack::ResultExt; +use mnn_bridge::ndarray::*; +use nalgebra::{Point2, Vector2}; +use ndarray_resize::NdFir; +use std::path::Path; + +pub struct FaceDetectionConfig { + min_sizes: Vec>, + steps: Vec, + variance: Vec, +} + +impl Default for FaceDetectionConfig { + fn default() -> Self { + FaceDetectionConfig { + min_sizes: vec![ + Vector2::new(16, 32), + Vector2::new(64, 128), + Vector2::new(256, 512), + ], + steps: vec![8, 16, 32], + variance: vec![0.1, 0.2], + } + } +} +pub struct FaceDetection { + handle: mnn_sync::SessionHandle, +} + +pub struct FaceDetectionModelOutput { + pub bbox: ndarray::Array3, + pub confidence: ndarray::Array3, + pub landmark: ndarray::Array3, +} + +impl FaceDetectionModelOutput { + pub fn postprocess(self, config: FaceDetectionConfig) -> Result>> { + let mut anchors = Vec::new(); + for (k, &step) in config.steps.iter().enumerate() { + let feature_size = 640 / step; + let min_sizes = config.min_sizes[k]; + let sizes = [min_sizes.x, min_sizes.y]; + for i in 0..feature_size { + for j in 0..feature_size { + for &size in &sizes { + let cx = (j as f32 + 0.5) * step as f32 / 640.0; + let cy = (i as f32 + 0.5) * step as f32 / 640.0; + let s_k = size as f32 / 640.0; + anchors.push((cx, cy, s_k, s_k)); + } + } + } + } + let mut boxes = Vec::new(); + let var0 = config.variance[0]; + let var1 = config.variance[1]; + let bbox_data = self.bbox; + let conf_data = self.confidence; + let num_priors = bbox_data.shape()[1]; + for idx in 0..num_priors { + let dx = bbox_data[[0, idx, 0]]; + let dy = bbox_data[[0, idx, 1]]; + let dw = bbox_data[[0, idx, 2]]; + let dh = bbox_data[[0, idx, 3]]; + let (anchor_cx, anchor_cy, anchor_w, anchor_h) = anchors[idx]; + let pred_cx = anchor_cx + dx * var0 * anchor_w; + let pred_cy = anchor_cy + dy * var0 * anchor_h; + let pred_w = anchor_w * (dw * var1).exp(); + let pred_h = anchor_h * (dh * var1).exp(); + let x_min = pred_cx - pred_w / 2.0; + let y_min = pred_cy - pred_h / 2.0; + let x_max = pred_cx + pred_w / 2.0; + let y_max = pred_cy + pred_h / 2.0; + let score = conf_data[[0, idx, 1]]; + if score > 0.6 { + boxes.push(Aabb2::from_min_max_vertices( + Point2::new(x_min, y_min), + Point2::new(x_max, y_max), + )); + } + } + Ok(boxes) + } +} + +impl FaceDetectionModelOutput { + pub fn print(&self, limit: usize) { + tracing::info!("Detected {} faces", self.bbox.shape()[1]); + + for (bbox, confidence) in self + .bbox + .clone() + .remove_axis(ndarray::Axis(0)) + .axis_iter(ndarray::Axis(0)) + .zip( + self.confidence + .clone() + .remove_axis(ndarray::Axis(0)) + .axis_iter(ndarray::Axis(0)) + .map(|c| c[1]), + ) + .filter(|(_, c)| *c > 0.1) + .take(limit) + { + tracing::info!("Face BBox: {:?}, Confidence: {:.2}", bbox, confidence); + } + } +} + +impl FaceDetection { + pub fn new(path: impl AsRef) -> Result { + let model = std::fs::read(path) + .change_context(Error) + .attach_printable("Failed to read model file")?; + Self::new_from_bytes(&model) + } + + pub fn new_from_bytes(model: &[u8]) -> Result { + tracing::info!("Loading face detection model from bytes"); + let mut model = mnn::Interpreter::from_bytes(model) + .map_err(|e| e.into_inner()) + .change_context(Error) + .attach_printable("Failed to load model from bytes")?; + model.set_session_mode(mnn::SessionMode::Release); + let bc = mnn::BackendConfig::default().with_memory_mode(mnn::MemoryMode::High); + let sc = mnn::ScheduleConfig::new() + .with_type(mnn::ForwardType::CPU) + .with_backend_config(bc); + tracing::info!("Creating session handle for face detection model"); + let handle = mnn_sync::SessionHandle::new(model, sc) + .change_context(Error) + .attach_printable("Failed to create session handle")?; + Ok(FaceDetection { handle }) + } + + pub fn detect_faces(&self, image: ndarray::Array3) -> Result { + #[rustfmt::skip] + use ::tap::*; + let output = self + .handle + .run(move |sr| { + let mut resized = image + .fast_resize(640, 640, None) + .change_context(mnn::ErrorKind::TensorError)? + .mapv(|f| f as f32) + .tap_mut(|arr| { + arr.axis_iter_mut(ndarray::Axis(2)) + .zip([104, 117, 123]) + .for_each(|(mut array, pixel)| { + let pixel = pixel as f32; + array.map_inplace(|v| *v -= pixel); + }); + }) + .permuted_axes((2, 0, 1)) + .insert_axis(ndarray::Axis(0)) + .as_standard_layout() + .into_owned(); + let tensor = resized + .as_mnn_tensor_mut() + .attach_printable("Failed to convert ndarray to mnn tensor") + .change_context(mnn::error::ErrorKind::TensorError)?; + tracing::trace!("Image Tensor shape: {:?}", tensor.shape()); + let (intptr, session) = sr.both_mut(); + tracing::trace!("Copying input tensor to host"); + unsafe { + let mut input = intptr.input_unresized::(session, "input")?; + tracing::trace!("Input shape: {:?}", input.shape()); + intptr.resize_tensor_by_nchw::, _>( + input.view_mut(), + 1, + 3, + 640, + 640, + ); + } + intptr.resize_session(session); + let mut input = intptr.input::(session, "input")?; + tracing::trace!("Input shape: {:?}", input.shape()); + input.copy_from_host_tensor(tensor.view())?; + + tracing::info!("Running face detection session"); + intptr.run_session(&session)?; + let output_tensor = intptr + .output::(&session, "bbox")? + .create_host_tensor_from_device(true) + .as_ndarray() + .to_owned(); + tracing::trace!("Output Bbox: \t\t{:?}", output_tensor.shape()); + let output_confidence = intptr + .output::(&session, "confidence")? + .create_host_tensor_from_device(true) + .as_ndarray::() + .to_owned(); + tracing::trace!("Output Confidence: \t{:?}", output_confidence.shape()); + let output_landmark = intptr + .output::(&session, "landmark")? + .create_host_tensor_from_device(true) + .as_ndarray::() + .to_owned(); + tracing::trace!("Output Landmark: \t{:?}", output_landmark.shape()); + Ok(FaceDetectionModelOutput { + bbox: output_tensor, + confidence: output_confidence, + landmark: output_landmark, + }) + }) + .map_err(|e| e.into_inner()) + .change_context(Error)?; + Ok(output) + } +} diff --git a/src/facedet/yolo.rs b/src/facedet/yolo.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/facedet/yolo.rs @@ -0,0 +1 @@ + diff --git a/src/main.rs b/src/main.rs index 93a379d..1f0869f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,7 @@ pub fn main() -> Result<()> { match args.cmd { cli::SubCommand::Detect(detect) => { use detector::facedet; - let model = facedet::FaceDetection::new_from_bytes(RETINAFACE_MODEL) + let model = facedet::retinaface::FaceDetection::new_from_bytes(RETINAFACE_MODEL) .change_context(errors::Error) .attach_printable("Failed to create face detection model")?; let image = image::open(detect.image).change_context(Error)?; @@ -27,7 +27,9 @@ pub fn main() -> Result<()> { .detect_faces(array) .change_context(errors::Error) .attach_printable("Failed to detect faces")?; - output.print(20); + // output.print(20); + let aabbs = output.postprocess(Default::default()); + dbg!(aabbs); } cli::SubCommand::List(list) => { println!("List: {:?}", list);