Driver: Implement audio scheduler (#179)

This PR implements a custom scheduler for audio threads, which reduces thread use and (often) memory consumption.

To save threads and memory (e.g., packet buffer allocations), Songbird parks Mixer tasks which do not have any live Tracks.
These are now all co-located on a single async 'Idle' task.
This task is responsible for managing UDP keepalive messages for each task, maintaining event state, and executing any Mixer task messages.
Whenever any message arrives which adds a `Track`, the mixer task is moved to a live thread.
The Idle task inspects task counts and execution time on each thread, choosing the first live thread with room, and creating a new one if needed.

Each live thread is responsible for running as many live mixers as it can in a single tick every 20ms: this currently defaults to 16 mixers per thread, but is user-configurable.
A live thread also stores RTP packet blocks to be written into by each sub-task.
Each live thread has a conservative limit of 18ms that it will aim to stay under: if all work takes longer than this, it will offload the task with the highest mixing cost once per tick onto another (possibly new) live worker thread.
This commit is contained in:
Kyle Simpson
2023-05-21 13:50:54 +01:00
parent a5f7d3f488
commit 3daf11f5d1
34 changed files with 2828 additions and 561 deletions

View File

@@ -16,164 +16,23 @@ use songbird::{
bench_internals::{
self,
mixer::{state::InputState, Mixer},
scheduler::*,
task_message::*,
CryptoState,
},
Bitrate,
DummyMixer,
Listeners,
MockScheduler,
},
input::{cached::Compressed, codecs::*, Input, RawAdapter},
tracks,
Config,
};
use std::{io::Cursor, net::UdpSocket};
use std::{io::Cursor, net::UdpSocket, sync::Arc};
use tokio::runtime::{Handle, Runtime};
use xsalsa20poly1305::{KeyInit, XSalsa20Poly1305 as Cipher, KEY_SIZE};
// create a dummied task + interconnect.
// measure perf at varying numbers of sources (binary 1--64) without passthrough support.
fn dummied_mixer(
handle: Handle,
softclip: bool,
) -> (
Mixer,
(
Receiver<CoreMessage>,
Receiver<EventMessage>,
Receiver<UdpRxMessage>,
),
) {
let (mix_tx, mix_rx) = flume::unbounded();
let (core_tx, core_rx) = flume::unbounded();
let (event_tx, event_rx) = flume::unbounded();
let (udp_receiver_tx, udp_receiver_rx) = flume::unbounded();
let ic = Interconnect {
core: core_tx,
events: event_tx,
mixer: mix_tx,
};
let config = Config::default().use_softclip(softclip);
let mut out = Mixer::new(mix_rx, handle, ic, config);
let udp_tx = UdpSocket::bind("0.0.0.0:0").expect("Failed to create send port.");
udp_tx
.connect("127.0.0.1:5316")
.expect("Failed to connect to local dest port.");
let fake_conn = MixerConnection {
cipher: Cipher::new_from_slice(&vec![0u8; KEY_SIZE]).unwrap(),
crypto_state: CryptoState::Normal,
udp_rx: udp_receiver_tx,
udp_tx,
};
out.conn_active = Some(fake_conn);
out.skip_sleep = true;
(out, (core_rx, event_rx, udp_receiver_rx))
}
fn mixer_float(
num_tracks: usize,
handle: Handle,
softclip: bool,
) -> (
Mixer,
(
Receiver<CoreMessage>,
Receiver<EventMessage>,
Receiver<UdpRxMessage>,
),
) {
let mut out = dummied_mixer(handle, softclip);
let floats = utils::make_sine(10 * STEREO_FRAME_SIZE, true);
for i in 0..num_tracks {
let input: Input = RawAdapter::new(Cursor::new(floats.clone()), 48_000, 2).into();
let promoted = match input {
Input::Live(l, _) => l.promote(&CODEC_REGISTRY, &PROBE),
_ => panic!("Failed to create a guaranteed source."),
};
let (handle, mut ctx) =
bench_internals::track_context(Input::Live(promoted.unwrap(), None).into());
out.0.add_track(ctx);
}
out
}
fn mixer_float_drop(
num_tracks: usize,
handle: Handle,
) -> (
Mixer,
(
Receiver<CoreMessage>,
Receiver<EventMessage>,
Receiver<UdpRxMessage>,
),
) {
let mut out = dummied_mixer(handle, true);
for i in 0..num_tracks {
let floats = utils::make_sine((i / 5) * STEREO_FRAME_SIZE, true);
let input: Input = RawAdapter::new(Cursor::new(floats.clone()), 48_000, 2).into();
let promoted = match input {
Input::Live(l, _) => l.promote(&CODEC_REGISTRY, &PROBE),
_ => panic!("Failed to create a guaranteed source."),
};
let (handle, mut ctx) =
bench_internals::track_context(Input::Live(promoted.unwrap(), None).into());
out.0.add_track(ctx);
}
out
}
fn mixer_opus(
handle: Handle,
) -> (
Mixer,
(
Receiver<CoreMessage>,
Receiver<EventMessage>,
Receiver<UdpRxMessage>,
),
) {
// should add a single opus-based track.
// make this fully loaded to prevent any perf cost there.
let mut out = dummied_mixer(handle.clone(), false);
let floats = utils::make_sine(6 * STEREO_FRAME_SIZE, true);
let input: Input = RawAdapter::new(Cursor::new(floats), 48_000, 2).into();
let mut src = handle.block_on(async move {
Compressed::new(input, Bitrate::BitsPerSecond(128_000))
.await
.expect("These parameters are well-defined.")
});
src.raw.load_all();
let promoted = match src.into() {
Input::Live(l, _) => l.promote(&CODEC_REGISTRY, &PROBE),
_ => panic!("Failed to create a guaranteed source."),
};
let (handle, mut ctx) =
bench_internals::track_context(Input::Live(promoted.unwrap(), None).into());
out.0.add_track(ctx);
out
}
fn no_passthrough(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
@@ -187,9 +46,14 @@ fn no_passthrough(c: &mut Criterion) {
&track_count,
|b, i| {
b.iter_batched_ref(
|| black_box(mixer_float(*i, rt.handle().clone(), true)),
|| {
black_box(MockScheduler::from_mixers(
None,
vec![Mixer::test_with_float(*i, rt.handle().clone(), true)],
))
},
|input| {
black_box(input.0.cycle());
black_box(input.0.core.run_once());
},
BatchSize::SmallInput,
)
@@ -200,9 +64,14 @@ fn no_passthrough(c: &mut Criterion) {
&track_count,
|b, i| {
b.iter_batched_ref(
|| black_box(mixer_float(*i, rt.handle().clone(), false)),
|| {
black_box(MockScheduler::from_mixers(
None,
vec![Mixer::test_with_float(*i, rt.handle().clone(), false)],
))
},
|input| {
black_box(input.0.cycle());
black_box(input.0.core.run_once());
},
BatchSize::SmallInput,
)
@@ -213,10 +82,71 @@ fn no_passthrough(c: &mut Criterion) {
&track_count,
|b, i| {
b.iter_batched_ref(
|| black_box(mixer_float(*i, rt.handle().clone(), true)),
|| {
black_box(MockScheduler::from_mixers(
None,
vec![Mixer::test_with_float(*i, rt.handle().clone(), true)],
))
},
|input| {
for i in 0..5 {
black_box(input.0.cycle());
black_box(input.0.core.run_once());
}
},
BatchSize::SmallInput,
)
},
);
}
group.finish();
}
fn no_passthrough_multimix(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
const N_MIXERS: usize = 16;
let mut group = c.benchmark_group(format!("Float Input (No Passthrough, {N_MIXERS} mixers)"));
for shift in 0..=2 {
let track_count = 1 << shift;
group.bench_with_input(
BenchmarkId::new("Single Packet (No Soft-Clip)", track_count),
&track_count,
|b, i| {
b.iter_batched_ref(
|| {
black_box(MockScheduler::from_mixers(
None,
(0..N_MIXERS)
.map(|_| Mixer::test_with_float(*i, rt.handle().clone(), false))
.collect(),
))
},
|input| {
black_box(input.0.core.run_once());
},
BatchSize::SmallInput,
)
},
);
group.bench_with_input(
BenchmarkId::new("n=5 Packets", track_count),
&track_count,
|b, i| {
b.iter_batched_ref(
|| {
black_box(MockScheduler::from_mixers(
None,
(0..N_MIXERS)
.map(|_| Mixer::test_with_float(*i, rt.handle().clone(), false))
.collect(),
))
},
|input| {
for i in 0..5 {
black_box(input.0.core.run_once());
}
},
BatchSize::SmallInput,
@@ -235,19 +165,29 @@ fn passthrough(c: &mut Criterion) {
group.bench_function("Single Packet", |b| {
b.iter_batched_ref(
|| black_box(mixer_opus(rt.handle().clone())),
|| {
black_box(MockScheduler::from_mixers(
None,
vec![Mixer::test_with_opus(rt.handle().clone())],
))
},
|input| {
black_box(input.0.cycle());
black_box(input.0.core.run_once());
},
BatchSize::SmallInput,
)
});
group.bench_function("n=5 Packets", |b| {
b.iter_batched_ref(
|| black_box(mixer_opus(rt.handle().clone())),
|| {
black_box(MockScheduler::from_mixers(
None,
vec![Mixer::test_with_opus(rt.handle().clone())],
))
},
|input| {
for i in 0..5 {
black_box(input.0.cycle());
black_box(input.0.core.run_once());
}
},
BatchSize::SmallInput,
@@ -257,15 +197,76 @@ fn passthrough(c: &mut Criterion) {
group.finish();
}
fn passthrough_multimix(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
const N_MIXERS: usize = 16;
let mut group = c.benchmark_group(format!("Opus Input (Passthrough, {N_MIXERS} mixers)"));
for shift in 0..=2 {
let track_count = 1 << shift;
group.bench_with_input(
BenchmarkId::new("Single Packet (No Soft-Clip)", track_count),
&track_count,
|b, i| {
b.iter_batched_ref(
|| {
black_box(MockScheduler::from_mixers(
None,
(0..N_MIXERS)
.map(|_| Mixer::test_with_opus(rt.handle().clone()))
.collect(),
))
},
|input| {
black_box(input.0.core.run_once());
},
BatchSize::SmallInput,
)
},
);
group.bench_with_input(
BenchmarkId::new("n=5 Packets", track_count),
&track_count,
|b, i| {
b.iter_batched_ref(
|| {
black_box(MockScheduler::from_mixers(
None,
(0..N_MIXERS)
.map(|_| Mixer::test_with_opus(rt.handle().clone()))
.collect(),
))
},
|input| {
for i in 0..5 {
black_box(input.0.core.run_once());
}
},
BatchSize::SmallInput,
)
},
);
}
group.finish();
}
fn culling(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
c.bench_function("Worst-case Track Culling (15 tracks, 5 pkts)", |b| {
b.iter_batched_ref(
|| black_box(mixer_float_drop(15, rt.handle().clone())),
|| {
black_box(MockScheduler::from_mixers(
None,
vec![Mixer::test_with_float_drop(15, rt.handle().clone())],
))
},
|input| {
for i in 0..5 {
black_box(input.0.cycle());
black_box(input.0.core.run_once());
}
},
BatchSize::SmallInput,
@@ -273,5 +274,69 @@ fn culling(c: &mut Criterion) {
});
}
criterion_group!(benches, no_passthrough, passthrough, culling);
criterion_main!(benches);
fn task_culling(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
const N_MIXERS: usize = 8;
c.bench_function("Live Mixer Thread Culling", |b| {
b.iter_batched_ref(
|| {
black_box(MockScheduler::from_mixers(
None,
(0..N_MIXERS)
.map(|_| Mixer::test_with_opus(rt.handle().clone()))
.collect(),
))
},
|input| {
black_box(input.0.core.remove_task(0));
},
BatchSize::SmallInput,
)
});
c.bench_function("Live Mixer Thread Culling (Practical)", |b| {
b.iter_batched_ref(
|| {
black_box(MockScheduler::from_mixers(
None,
(0..N_MIXERS)
.map(|_| Mixer::test_with_opus(rt.handle().clone()))
.collect(),
))
},
|input| {
black_box({
input.0.core.mark_for_cull(0);
input.0.core.mark_for_cull(1);
input.0.core.mark_for_cull(4);
input.0.core.demote_and_remove_mixers();
});
},
BatchSize::SmallInput,
)
});
c.bench_function("Live Mixer Thread Culling (Practical, NoDel)", |b| {
b.iter_batched_ref(
|| {
black_box(MockScheduler::from_mixers(
None,
(0..N_MIXERS)
.map(|_| Mixer::test_with_opus(rt.handle().clone()))
.collect(),
))
},
|input| {
black_box(input.0.core.demote_and_remove_mixers());
},
BatchSize::SmallInput,
)
});
}
criterion_group!(individual, no_passthrough, passthrough);
criterion_group!(multimix, no_passthrough_multimix, passthrough_multimix);
criterion_group!(deletions, culling, task_culling);
criterion_main!(individual, multimix, deletions);