feat: Remove bbox crate and use 1024 for image size
This commit is contained in:
@@ -18,6 +18,7 @@ pub enum SubCommand {
|
||||
#[derive(Debug, clap::ValueEnum, Clone, Copy)]
|
||||
pub enum Models {
|
||||
RetinaFace,
|
||||
Yolo,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::ValueEnum, Clone, Copy)]
|
||||
@@ -49,6 +50,8 @@ pub struct Detect {
|
||||
pub output: Option<PathBuf>,
|
||||
#[clap(short, long, default_value_t = 0.8)]
|
||||
pub threshold: f32,
|
||||
#[clap(short, long, default_value_t = 0.3)]
|
||||
pub nms_threshold: f32,
|
||||
pub image: PathBuf,
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use ndarray_resize::NdFir;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct FaceDetectionConfig {
|
||||
min_sizes: Vec<Vector2<usize>>,
|
||||
anchor_sizes: Vec<Vector2<usize>>,
|
||||
steps: Vec<usize>,
|
||||
variance: Vec<f32>,
|
||||
threshold: f32,
|
||||
@@ -16,7 +16,7 @@ pub struct FaceDetectionConfig {
|
||||
|
||||
impl FaceDetectionConfig {
|
||||
pub fn with_min_sizes(mut self, min_sizes: Vec<Vector2<usize>>) -> Self {
|
||||
self.min_sizes = min_sizes;
|
||||
self.anchor_sizes = min_sizes;
|
||||
self
|
||||
}
|
||||
pub fn with_steps(mut self, steps: Vec<usize>) -> Self {
|
||||
@@ -40,7 +40,7 @@ impl FaceDetectionConfig {
|
||||
impl Default for FaceDetectionConfig {
|
||||
fn default() -> Self {
|
||||
FaceDetectionConfig {
|
||||
min_sizes: vec![
|
||||
anchor_sizes: vec![
|
||||
Vector2::new(16, 32),
|
||||
Vector2::new(64, 128),
|
||||
Vector2::new(256, 512),
|
||||
@@ -48,7 +48,7 @@ impl Default for FaceDetectionConfig {
|
||||
steps: vec![8, 16, 32],
|
||||
variance: vec![0.1, 0.2],
|
||||
threshold: 0.8,
|
||||
nms_threshold: 0.6,
|
||||
nms_threshold: 0.4,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -91,15 +91,15 @@ impl FaceDetectionModelOutput {
|
||||
pub fn postprocess(self, config: &FaceDetectionConfig) -> Result<FaceDetectionProcessedOutput> {
|
||||
let mut anchors = Vec::new();
|
||||
for (k, &step) in config.steps.iter().enumerate() {
|
||||
let feature_size = 640 / step;
|
||||
let min_sizes = config.min_sizes[k];
|
||||
let feature_size = 1024 / step;
|
||||
let min_sizes = config.anchor_sizes[k];
|
||||
let sizes = [min_sizes.x, min_sizes.y];
|
||||
for i in 0..feature_size {
|
||||
for j in 0..feature_size {
|
||||
for &size in &sizes {
|
||||
let cx = (j as f32 + 0.5) * step as f32 / 640.0;
|
||||
let cy = (i as f32 + 0.5) * step as f32 / 640.0;
|
||||
let s_k = size as f32 / 640.0;
|
||||
let cx = (j as f32 + 0.5) * step as f32 / 1024.0;
|
||||
let cy = (i as f32 + 0.5) * step as f32 / 1024.0;
|
||||
let s_k = size as f32 / 1024.0;
|
||||
anchors.push((cx, cy, s_k, s_k));
|
||||
}
|
||||
}
|
||||
@@ -220,7 +220,7 @@ impl FaceDetection {
|
||||
image: ndarray::Array3<u8>,
|
||||
config: FaceDetectionConfig,
|
||||
) -> Result<FaceDetectionOutput> {
|
||||
let (height, width, channels) = image.dim();
|
||||
let (height, width, _channels) = image.dim();
|
||||
let output = self
|
||||
.run_models(image)
|
||||
.change_context(Error)
|
||||
@@ -242,17 +242,31 @@ impl FaceDetection {
|
||||
.map(|((b, s), l)| (b, s, l))
|
||||
.multiunzip();
|
||||
|
||||
let boxes = nms(&boxes, &scores, config.threshold, config.nms_threshold);
|
||||
let keep_indices = nms(&boxes, &scores, config.threshold, config.nms_threshold);
|
||||
|
||||
let bboxes = boxes
|
||||
.into_iter()
|
||||
.flat_map(|x| x.denormalize(factor).try_cast::<usize>())
|
||||
.enumerate()
|
||||
.filter(|(i, _)| keep_indices.contains(i))
|
||||
.flat_map(|(_, x)| x.denormalize(factor).try_cast::<usize>())
|
||||
.collect();
|
||||
let confidence = scores
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| keep_indices.contains(i))
|
||||
.map(|(_, score)| score)
|
||||
.collect();
|
||||
let landmark = landmarks
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| keep_indices.contains(i))
|
||||
.map(|(_, score)| score)
|
||||
.collect();
|
||||
|
||||
Ok(FaceDetectionOutput {
|
||||
bbox: bboxes,
|
||||
confidence: processed.confidence,
|
||||
landmark: processed.landmarks,
|
||||
confidence,
|
||||
landmark,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -263,7 +277,7 @@ impl FaceDetection {
|
||||
.handle
|
||||
.run(move |sr| {
|
||||
let mut resized = image
|
||||
.fast_resize(640, 640, None)
|
||||
.fast_resize(1024, 1024, None)
|
||||
.change_context(mnn::ErrorKind::TensorError)?
|
||||
.mapv(|f| f as f32)
|
||||
.tap_mut(|arr| {
|
||||
@@ -292,8 +306,8 @@ impl FaceDetection {
|
||||
input.view_mut(),
|
||||
1,
|
||||
3,
|
||||
640,
|
||||
640,
|
||||
1024,
|
||||
1024,
|
||||
);
|
||||
}
|
||||
intptr.resize_session(session);
|
||||
|
||||
@@ -34,7 +34,7 @@ pub fn main() -> Result<()> {
|
||||
for bbox in output.bbox {
|
||||
tracing::info!("Detected face: {:?}", bbox);
|
||||
use bounding_box::draw::*;
|
||||
array.draw(bbox, color::palette::css::GREEN_YELLOW.to_rgba8(), 10);
|
||||
array.draw(bbox, color::palette::css::GREEN_YELLOW.to_rgba8(), 1);
|
||||
}
|
||||
let v = array.view();
|
||||
if let Some(output) = detect.output {
|
||||
|
||||
Reference in New Issue
Block a user