feat: output aabbs from retinaface
This commit is contained in:
182
src/facedet.rs
182
src/facedet.rs
@@ -1,180 +1,2 @@
|
|||||||
use crate::errors::*;
|
pub mod retinaface;
|
||||||
use bounding_box::Aabb2;
|
pub mod yolo;
|
||||||
use error_stack::ResultExt;
|
|
||||||
use mnn_bridge::ndarray::*;
|
|
||||||
use nalgebra::{Point2, Vector2};
|
|
||||||
use ndarray_resize::NdFir;
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
pub struct FaceDetectionConfig {
|
|
||||||
min_sizes: Vec<Vector2<usize>>,
|
|
||||||
steps: Vec<usize>,
|
|
||||||
variance: Vec<f32>,
|
|
||||||
}
|
|
||||||
pub struct FaceDetection {
|
|
||||||
handle: mnn_sync::SessionHandle,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FaceDetectionModelOutput {
|
|
||||||
pub bbox: ndarray::Array3<f32>,
|
|
||||||
pub confidence: ndarray::Array3<f32>,
|
|
||||||
pub landmark: ndarray::Array3<f32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FaceDetectionModelOutput {
|
|
||||||
pub fn postprocess(self, config: FaceDetectionConfig) -> Result<Vec<Aabb2<f32>>> {
|
|
||||||
// for k, step in enumerate(cfg['steps']):
|
|
||||||
// feature_size = 640 // step
|
|
||||||
// for i in range(feature_size):
|
|
||||||
// for j in range(feature_size):
|
|
||||||
// for min_size in cfg['min_sizes'][k]:
|
|
||||||
// cx = (j + 0.5) * step / 640
|
|
||||||
// cy = (i + 0.5) * step / 640
|
|
||||||
// s_kx = s_ky = min_size / 640
|
|
||||||
// anchors.append([cx, cy, s_kx, s_ky])
|
|
||||||
let mut anchors = Vec::new();
|
|
||||||
config.steps.iter().enumerate().for_each(|(k, step)| {
|
|
||||||
let feature_size = 640 / step;
|
|
||||||
for i in 0..feature_size {
|
|
||||||
for j in 0..feature_size {
|
|
||||||
for min_size in &config.min_sizes[k] {
|
|
||||||
let cx = (j as f32 + 0.5) * *step as f32 / 640.0;
|
|
||||||
let cy = (i as f32 + 0.5) * *step as f32 / 640.0;
|
|
||||||
let s_kx = *min_size as f32 / 640.0;
|
|
||||||
let s_ky = *min_size as f32 / 640.0;
|
|
||||||
anchors.push([cx, cy, s_kx, s_ky]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(Vec::new())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FaceDetectionModelOutput {
|
|
||||||
pub fn print(&self, limit: usize) {
|
|
||||||
tracing::info!("Detected {} faces", self.bbox.shape()[1]);
|
|
||||||
|
|
||||||
for (bbox, confidence) in self
|
|
||||||
.bbox
|
|
||||||
.clone()
|
|
||||||
.remove_axis(ndarray::Axis(0))
|
|
||||||
.axis_iter(ndarray::Axis(0))
|
|
||||||
.zip(
|
|
||||||
self.confidence
|
|
||||||
.clone()
|
|
||||||
.remove_axis(ndarray::Axis(0))
|
|
||||||
.axis_iter(ndarray::Axis(0))
|
|
||||||
.map(|c| c[1]),
|
|
||||||
)
|
|
||||||
.filter(|(_, c)| *c > 0.1)
|
|
||||||
.take(limit)
|
|
||||||
{
|
|
||||||
tracing::info!("Face BBox: {:?}, Confidence: {:.2}", bbox, confidence);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FaceDetection {
|
|
||||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
|
||||||
let model = std::fs::read(path)
|
|
||||||
.change_context(Error)
|
|
||||||
.attach_printable("Failed to read model file")?;
|
|
||||||
Self::new_from_bytes(&model)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new_from_bytes(model: &[u8]) -> Result<Self> {
|
|
||||||
tracing::info!("Loading face detection model from bytes");
|
|
||||||
let mut model = mnn::Interpreter::from_bytes(model)
|
|
||||||
.map_err(|e| e.into_inner())
|
|
||||||
.change_context(Error)
|
|
||||||
.attach_printable("Failed to load model from bytes")?;
|
|
||||||
model.set_session_mode(mnn::SessionMode::Release);
|
|
||||||
let bc = mnn::BackendConfig::default().with_memory_mode(mnn::MemoryMode::High);
|
|
||||||
let sc = mnn::ScheduleConfig::new()
|
|
||||||
.with_type(mnn::ForwardType::CoreML)
|
|
||||||
.with_backend_config(bc);
|
|
||||||
tracing::info!("Creating session handle for face detection model");
|
|
||||||
let handle = mnn_sync::SessionHandle::new(model, sc)
|
|
||||||
.change_context(Error)
|
|
||||||
.attach_printable("Failed to create session handle")?;
|
|
||||||
Ok(FaceDetection { handle })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn detect_faces(&self, image: ndarray::Array3<u8>) -> Result<FaceDetectionModelOutput> {
|
|
||||||
#[rustfmt::skip]
|
|
||||||
use ::tap::*;
|
|
||||||
let output = self
|
|
||||||
.handle
|
|
||||||
.run(move |sr| {
|
|
||||||
let mut resized = image
|
|
||||||
.fast_resize(640, 640, None)
|
|
||||||
.change_context(mnn::ErrorKind::TensorError)?
|
|
||||||
.mapv(|f| f as f32)
|
|
||||||
.tap_mut(|arr| {
|
|
||||||
arr.axis_iter_mut(ndarray::Axis(2))
|
|
||||||
.zip([104, 117, 123])
|
|
||||||
.for_each(|(mut array, pixel)| {
|
|
||||||
let pixel = pixel as f32;
|
|
||||||
array.map_inplace(|v| *v -= pixel);
|
|
||||||
});
|
|
||||||
})
|
|
||||||
.permuted_axes((2, 0, 1))
|
|
||||||
.insert_axis(ndarray::Axis(0))
|
|
||||||
.as_standard_layout()
|
|
||||||
.into_owned();
|
|
||||||
let tensor = resized
|
|
||||||
.as_mnn_tensor_mut()
|
|
||||||
.attach_printable("Failed to convert ndarray to mnn tensor")
|
|
||||||
.change_context(mnn::error::ErrorKind::TensorError)?;
|
|
||||||
tracing::trace!("Image Tensor shape: {:?}", tensor.shape());
|
|
||||||
let (intptr, session) = sr.both_mut();
|
|
||||||
tracing::trace!("Copying input tensor to host");
|
|
||||||
unsafe {
|
|
||||||
let mut input = intptr.input_unresized::<f32>(session, "input")?;
|
|
||||||
tracing::trace!("Input shape: {:?}", input.shape());
|
|
||||||
intptr.resize_tensor_by_nchw::<mnn::View<&mut f32>, _>(
|
|
||||||
input.view_mut(),
|
|
||||||
1,
|
|
||||||
3,
|
|
||||||
640,
|
|
||||||
640,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
intptr.resize_session(session);
|
|
||||||
let mut input = intptr.input::<f32>(session, "input")?;
|
|
||||||
tracing::trace!("Input shape: {:?}", input.shape());
|
|
||||||
input.copy_from_host_tensor(tensor.view())?;
|
|
||||||
|
|
||||||
tracing::info!("Running face detection session");
|
|
||||||
intptr.run_session(&session)?;
|
|
||||||
let output_tensor = intptr
|
|
||||||
.output::<f32>(&session, "bbox")?
|
|
||||||
.create_host_tensor_from_device(true)
|
|
||||||
.as_ndarray()
|
|
||||||
.to_owned();
|
|
||||||
tracing::trace!("Output Bbox: \t\t{:?}", output_tensor.shape());
|
|
||||||
let output_confidence = intptr
|
|
||||||
.output::<f32>(&session, "confidence")?
|
|
||||||
.create_host_tensor_from_device(true)
|
|
||||||
.as_ndarray::<ndarray::Ix3>()
|
|
||||||
.to_owned();
|
|
||||||
tracing::trace!("Output Confidence: \t{:?}", output_confidence.shape());
|
|
||||||
let output_landmark = intptr
|
|
||||||
.output::<f32>(&session, "landmark")?
|
|
||||||
.create_host_tensor_from_device(true)
|
|
||||||
.as_ndarray::<ndarray::Ix3>()
|
|
||||||
.to_owned();
|
|
||||||
tracing::trace!("Output Landmark: \t{:?}", output_landmark.shape());
|
|
||||||
Ok(FaceDetectionModelOutput {
|
|
||||||
bbox: output_tensor,
|
|
||||||
confidence: output_confidence,
|
|
||||||
landmark: output_landmark,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.map_err(|e| e.into_inner())
|
|
||||||
.change_context(Error)?;
|
|
||||||
Ok(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
213
src/facedet/retinaface.rs
Normal file
213
src/facedet/retinaface.rs
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
use crate::errors::*;
|
||||||
|
use bounding_box::Aabb2;
|
||||||
|
use error_stack::ResultExt;
|
||||||
|
use mnn_bridge::ndarray::*;
|
||||||
|
use nalgebra::{Point2, Vector2};
|
||||||
|
use ndarray_resize::NdFir;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
pub struct FaceDetectionConfig {
|
||||||
|
min_sizes: Vec<Vector2<usize>>,
|
||||||
|
steps: Vec<usize>,
|
||||||
|
variance: Vec<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FaceDetectionConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
FaceDetectionConfig {
|
||||||
|
min_sizes: vec![
|
||||||
|
Vector2::new(16, 32),
|
||||||
|
Vector2::new(64, 128),
|
||||||
|
Vector2::new(256, 512),
|
||||||
|
],
|
||||||
|
steps: vec![8, 16, 32],
|
||||||
|
variance: vec![0.1, 0.2],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub struct FaceDetection {
|
||||||
|
handle: mnn_sync::SessionHandle,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FaceDetectionModelOutput {
|
||||||
|
pub bbox: ndarray::Array3<f32>,
|
||||||
|
pub confidence: ndarray::Array3<f32>,
|
||||||
|
pub landmark: ndarray::Array3<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FaceDetectionModelOutput {
|
||||||
|
pub fn postprocess(self, config: FaceDetectionConfig) -> Result<Vec<Aabb2<f32>>> {
|
||||||
|
let mut anchors = Vec::new();
|
||||||
|
for (k, &step) in config.steps.iter().enumerate() {
|
||||||
|
let feature_size = 640 / step;
|
||||||
|
let min_sizes = config.min_sizes[k];
|
||||||
|
let sizes = [min_sizes.x, min_sizes.y];
|
||||||
|
for i in 0..feature_size {
|
||||||
|
for j in 0..feature_size {
|
||||||
|
for &size in &sizes {
|
||||||
|
let cx = (j as f32 + 0.5) * step as f32 / 640.0;
|
||||||
|
let cy = (i as f32 + 0.5) * step as f32 / 640.0;
|
||||||
|
let s_k = size as f32 / 640.0;
|
||||||
|
anchors.push((cx, cy, s_k, s_k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut boxes = Vec::new();
|
||||||
|
let var0 = config.variance[0];
|
||||||
|
let var1 = config.variance[1];
|
||||||
|
let bbox_data = self.bbox;
|
||||||
|
let conf_data = self.confidence;
|
||||||
|
let num_priors = bbox_data.shape()[1];
|
||||||
|
for idx in 0..num_priors {
|
||||||
|
let dx = bbox_data[[0, idx, 0]];
|
||||||
|
let dy = bbox_data[[0, idx, 1]];
|
||||||
|
let dw = bbox_data[[0, idx, 2]];
|
||||||
|
let dh = bbox_data[[0, idx, 3]];
|
||||||
|
let (anchor_cx, anchor_cy, anchor_w, anchor_h) = anchors[idx];
|
||||||
|
let pred_cx = anchor_cx + dx * var0 * anchor_w;
|
||||||
|
let pred_cy = anchor_cy + dy * var0 * anchor_h;
|
||||||
|
let pred_w = anchor_w * (dw * var1).exp();
|
||||||
|
let pred_h = anchor_h * (dh * var1).exp();
|
||||||
|
let x_min = pred_cx - pred_w / 2.0;
|
||||||
|
let y_min = pred_cy - pred_h / 2.0;
|
||||||
|
let x_max = pred_cx + pred_w / 2.0;
|
||||||
|
let y_max = pred_cy + pred_h / 2.0;
|
||||||
|
let score = conf_data[[0, idx, 1]];
|
||||||
|
if score > 0.6 {
|
||||||
|
boxes.push(Aabb2::from_min_max_vertices(
|
||||||
|
Point2::new(x_min, y_min),
|
||||||
|
Point2::new(x_max, y_max),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(boxes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FaceDetectionModelOutput {
|
||||||
|
pub fn print(&self, limit: usize) {
|
||||||
|
tracing::info!("Detected {} faces", self.bbox.shape()[1]);
|
||||||
|
|
||||||
|
for (bbox, confidence) in self
|
||||||
|
.bbox
|
||||||
|
.clone()
|
||||||
|
.remove_axis(ndarray::Axis(0))
|
||||||
|
.axis_iter(ndarray::Axis(0))
|
||||||
|
.zip(
|
||||||
|
self.confidence
|
||||||
|
.clone()
|
||||||
|
.remove_axis(ndarray::Axis(0))
|
||||||
|
.axis_iter(ndarray::Axis(0))
|
||||||
|
.map(|c| c[1]),
|
||||||
|
)
|
||||||
|
.filter(|(_, c)| *c > 0.1)
|
||||||
|
.take(limit)
|
||||||
|
{
|
||||||
|
tracing::info!("Face BBox: {:?}, Confidence: {:.2}", bbox, confidence);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FaceDetection {
|
||||||
|
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||||
|
let model = std::fs::read(path)
|
||||||
|
.change_context(Error)
|
||||||
|
.attach_printable("Failed to read model file")?;
|
||||||
|
Self::new_from_bytes(&model)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_from_bytes(model: &[u8]) -> Result<Self> {
|
||||||
|
tracing::info!("Loading face detection model from bytes");
|
||||||
|
let mut model = mnn::Interpreter::from_bytes(model)
|
||||||
|
.map_err(|e| e.into_inner())
|
||||||
|
.change_context(Error)
|
||||||
|
.attach_printable("Failed to load model from bytes")?;
|
||||||
|
model.set_session_mode(mnn::SessionMode::Release);
|
||||||
|
let bc = mnn::BackendConfig::default().with_memory_mode(mnn::MemoryMode::High);
|
||||||
|
let sc = mnn::ScheduleConfig::new()
|
||||||
|
.with_type(mnn::ForwardType::CPU)
|
||||||
|
.with_backend_config(bc);
|
||||||
|
tracing::info!("Creating session handle for face detection model");
|
||||||
|
let handle = mnn_sync::SessionHandle::new(model, sc)
|
||||||
|
.change_context(Error)
|
||||||
|
.attach_printable("Failed to create session handle")?;
|
||||||
|
Ok(FaceDetection { handle })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn detect_faces(&self, image: ndarray::Array3<u8>) -> Result<FaceDetectionModelOutput> {
|
||||||
|
#[rustfmt::skip]
|
||||||
|
use ::tap::*;
|
||||||
|
let output = self
|
||||||
|
.handle
|
||||||
|
.run(move |sr| {
|
||||||
|
let mut resized = image
|
||||||
|
.fast_resize(640, 640, None)
|
||||||
|
.change_context(mnn::ErrorKind::TensorError)?
|
||||||
|
.mapv(|f| f as f32)
|
||||||
|
.tap_mut(|arr| {
|
||||||
|
arr.axis_iter_mut(ndarray::Axis(2))
|
||||||
|
.zip([104, 117, 123])
|
||||||
|
.for_each(|(mut array, pixel)| {
|
||||||
|
let pixel = pixel as f32;
|
||||||
|
array.map_inplace(|v| *v -= pixel);
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.permuted_axes((2, 0, 1))
|
||||||
|
.insert_axis(ndarray::Axis(0))
|
||||||
|
.as_standard_layout()
|
||||||
|
.into_owned();
|
||||||
|
let tensor = resized
|
||||||
|
.as_mnn_tensor_mut()
|
||||||
|
.attach_printable("Failed to convert ndarray to mnn tensor")
|
||||||
|
.change_context(mnn::error::ErrorKind::TensorError)?;
|
||||||
|
tracing::trace!("Image Tensor shape: {:?}", tensor.shape());
|
||||||
|
let (intptr, session) = sr.both_mut();
|
||||||
|
tracing::trace!("Copying input tensor to host");
|
||||||
|
unsafe {
|
||||||
|
let mut input = intptr.input_unresized::<f32>(session, "input")?;
|
||||||
|
tracing::trace!("Input shape: {:?}", input.shape());
|
||||||
|
intptr.resize_tensor_by_nchw::<mnn::View<&mut f32>, _>(
|
||||||
|
input.view_mut(),
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
640,
|
||||||
|
640,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
intptr.resize_session(session);
|
||||||
|
let mut input = intptr.input::<f32>(session, "input")?;
|
||||||
|
tracing::trace!("Input shape: {:?}", input.shape());
|
||||||
|
input.copy_from_host_tensor(tensor.view())?;
|
||||||
|
|
||||||
|
tracing::info!("Running face detection session");
|
||||||
|
intptr.run_session(&session)?;
|
||||||
|
let output_tensor = intptr
|
||||||
|
.output::<f32>(&session, "bbox")?
|
||||||
|
.create_host_tensor_from_device(true)
|
||||||
|
.as_ndarray()
|
||||||
|
.to_owned();
|
||||||
|
tracing::trace!("Output Bbox: \t\t{:?}", output_tensor.shape());
|
||||||
|
let output_confidence = intptr
|
||||||
|
.output::<f32>(&session, "confidence")?
|
||||||
|
.create_host_tensor_from_device(true)
|
||||||
|
.as_ndarray::<ndarray::Ix3>()
|
||||||
|
.to_owned();
|
||||||
|
tracing::trace!("Output Confidence: \t{:?}", output_confidence.shape());
|
||||||
|
let output_landmark = intptr
|
||||||
|
.output::<f32>(&session, "landmark")?
|
||||||
|
.create_host_tensor_from_device(true)
|
||||||
|
.as_ndarray::<ndarray::Ix3>()
|
||||||
|
.to_owned();
|
||||||
|
tracing::trace!("Output Landmark: \t{:?}", output_landmark.shape());
|
||||||
|
Ok(FaceDetectionModelOutput {
|
||||||
|
bbox: output_tensor,
|
||||||
|
confidence: output_confidence,
|
||||||
|
landmark: output_landmark,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.map_err(|e| e.into_inner())
|
||||||
|
.change_context(Error)?;
|
||||||
|
Ok(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
1
src/facedet/yolo.rs
Normal file
1
src/facedet/yolo.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
@@ -14,7 +14,7 @@ pub fn main() -> Result<()> {
|
|||||||
match args.cmd {
|
match args.cmd {
|
||||||
cli::SubCommand::Detect(detect) => {
|
cli::SubCommand::Detect(detect) => {
|
||||||
use detector::facedet;
|
use detector::facedet;
|
||||||
let model = facedet::FaceDetection::new_from_bytes(RETINAFACE_MODEL)
|
let model = facedet::retinaface::FaceDetection::new_from_bytes(RETINAFACE_MODEL)
|
||||||
.change_context(errors::Error)
|
.change_context(errors::Error)
|
||||||
.attach_printable("Failed to create face detection model")?;
|
.attach_printable("Failed to create face detection model")?;
|
||||||
let image = image::open(detect.image).change_context(Error)?;
|
let image = image::open(detect.image).change_context(Error)?;
|
||||||
@@ -27,7 +27,9 @@ pub fn main() -> Result<()> {
|
|||||||
.detect_faces(array)
|
.detect_faces(array)
|
||||||
.change_context(errors::Error)
|
.change_context(errors::Error)
|
||||||
.attach_printable("Failed to detect faces")?;
|
.attach_printable("Failed to detect faces")?;
|
||||||
output.print(20);
|
// output.print(20);
|
||||||
|
let aabbs = output.postprocess(Default::default());
|
||||||
|
dbg!(aabbs);
|
||||||
}
|
}
|
||||||
cli::SubCommand::List(list) => {
|
cli::SubCommand::List(list) => {
|
||||||
println!("List: {:?}", list);
|
println!("List: {:?}", list);
|
||||||
|
|||||||
Reference in New Issue
Block a user