feat: Remove bbox crate and use 1024 for image size

This commit is contained in:
uttarayan21
2025-08-05 18:14:31 +05:30
parent 06fb0b4487
commit 043a845fc1
10 changed files with 89 additions and 839 deletions

View File

@@ -7,7 +7,7 @@ use ndarray_resize::NdFir;
use std::path::Path;
pub struct FaceDetectionConfig {
min_sizes: Vec<Vector2<usize>>,
anchor_sizes: Vec<Vector2<usize>>,
steps: Vec<usize>,
variance: Vec<f32>,
threshold: f32,
@@ -16,7 +16,7 @@ pub struct FaceDetectionConfig {
impl FaceDetectionConfig {
pub fn with_min_sizes(mut self, min_sizes: Vec<Vector2<usize>>) -> Self {
self.min_sizes = min_sizes;
self.anchor_sizes = min_sizes;
self
}
pub fn with_steps(mut self, steps: Vec<usize>) -> Self {
@@ -40,7 +40,7 @@ impl FaceDetectionConfig {
impl Default for FaceDetectionConfig {
fn default() -> Self {
FaceDetectionConfig {
min_sizes: vec![
anchor_sizes: vec![
Vector2::new(16, 32),
Vector2::new(64, 128),
Vector2::new(256, 512),
@@ -48,7 +48,7 @@ impl Default for FaceDetectionConfig {
steps: vec![8, 16, 32],
variance: vec![0.1, 0.2],
threshold: 0.8,
nms_threshold: 0.6,
nms_threshold: 0.4,
}
}
}
@@ -91,15 +91,15 @@ impl FaceDetectionModelOutput {
pub fn postprocess(self, config: &FaceDetectionConfig) -> Result<FaceDetectionProcessedOutput> {
let mut anchors = Vec::new();
for (k, &step) in config.steps.iter().enumerate() {
let feature_size = 640 / step;
let min_sizes = config.min_sizes[k];
let feature_size = 1024 / step;
let min_sizes = config.anchor_sizes[k];
let sizes = [min_sizes.x, min_sizes.y];
for i in 0..feature_size {
for j in 0..feature_size {
for &size in &sizes {
let cx = (j as f32 + 0.5) * step as f32 / 640.0;
let cy = (i as f32 + 0.5) * step as f32 / 640.0;
let s_k = size as f32 / 640.0;
let cx = (j as f32 + 0.5) * step as f32 / 1024.0;
let cy = (i as f32 + 0.5) * step as f32 / 1024.0;
let s_k = size as f32 / 1024.0;
anchors.push((cx, cy, s_k, s_k));
}
}
@@ -220,7 +220,7 @@ impl FaceDetection {
image: ndarray::Array3<u8>,
config: FaceDetectionConfig,
) -> Result<FaceDetectionOutput> {
let (height, width, channels) = image.dim();
let (height, width, _channels) = image.dim();
let output = self
.run_models(image)
.change_context(Error)
@@ -242,17 +242,31 @@ impl FaceDetection {
.map(|((b, s), l)| (b, s, l))
.multiunzip();
let boxes = nms(&boxes, &scores, config.threshold, config.nms_threshold);
let keep_indices = nms(&boxes, &scores, config.threshold, config.nms_threshold);
let bboxes = boxes
.into_iter()
.flat_map(|x| x.denormalize(factor).try_cast::<usize>())
.enumerate()
.filter(|(i, _)| keep_indices.contains(i))
.flat_map(|(_, x)| x.denormalize(factor).try_cast::<usize>())
.collect();
let confidence = scores
.into_iter()
.enumerate()
.filter(|(i, _)| keep_indices.contains(i))
.map(|(_, score)| score)
.collect();
let landmark = landmarks
.into_iter()
.enumerate()
.filter(|(i, _)| keep_indices.contains(i))
.map(|(_, score)| score)
.collect();
Ok(FaceDetectionOutput {
bbox: bboxes,
confidence: processed.confidence,
landmark: processed.landmarks,
confidence,
landmark,
})
}
@@ -263,7 +277,7 @@ impl FaceDetection {
.handle
.run(move |sr| {
let mut resized = image
.fast_resize(640, 640, None)
.fast_resize(1024, 1024, None)
.change_context(mnn::ErrorKind::TensorError)?
.mapv(|f| f as f32)
.tap_mut(|arr| {
@@ -292,8 +306,8 @@ impl FaceDetection {
input.view_mut(),
1,
3,
640,
640,
1024,
1024,
);
}
intptr.resize_session(session);