Driver/Input: Migrate audio backend to Symphonia (#89)

This extensive PR rewrites the internal mixing logic of the driver to use symphonia for parsing and decoding audio data, and rubato to resample audio. Existing logic to decode DCA and Opus formats/data have been reworked as plugins for symphonia. The main benefit is that we no longer need to keep yt-dlp and ffmpeg processes alive, saving a lot of memory and CPU: all decoding can be done in Rust! In exchange, we now need to do a lot of the HTTP handling and resumption ourselves, but this is still a huge net positive. `Input`s have been completely reworked such that all default (non-cached) sources are lazy by default, and are no longer covered by a special-case `Restartable`. These now span a gamut from a `Compose` (lazy), to a live source, to a fully `Parsed` source. As mixing is still sync, this includes adapters for `AsyncRead`/`AsyncSeek`, and HTTP streams. `Track`s have been reworked so that they only contain initialisation state for each track. `TrackHandles` are only created once a `Track`/`Input` has been handed over to the driver, replacing `create_player` and related functions. `TrackHandle::action` now acts on a `View` of (im)mutable state, and can request seeks/readying via `Action`. Per-track event handling has also been improved -- we can now determine and propagate the reason behind individual track errors due to the new backend. Some `TrackHandle` commands (seek etc.) benefit from this, and now use internal callbacks to signal completion. Due to associated PRs on felixmcfelix/songbird from avid testers, this includes general clippy tweaks, API additions, and other repo-wide cleanup. Thanks go out to the below co-authors. Co-authored-by: Gnome! <45660393+GnomedDev@users.noreply.github.com> Co-authored-by: Alakh <36898190+alakhpc@users.noreply.github.com>
2022-07-23 23:29:02 +01:00
parent 6c6ffa7ca8
commit 8cc7a22b0b
136 changed files with 9761 additions and 4891 deletions
--- a/examples/serenity/voice_receive/src/main.rs
+++ b/examples/serenity/voice_receive/src/main.rs
@@ -12,17 +12,14 @@ use serenity::{
    async_trait,
    client::{Client, Context, EventHandler},
    framework::{
-        StandardFramework,
        standard::{
            macros::{command, group},
-            Args, CommandResult,
+            Args,
+            CommandResult,
        },
+        StandardFramework,
    },
-    model::{
-        channel::Message,
-        gateway::Ready,
-        id::ChannelId,
-    },
+    model::{channel::Message, gateway::Ready, id::ChannelId},
    prelude::{GatewayIntents, Mentionable},
    Result as SerenityResult,
 };
@@ -53,7 +50,7 @@ impl Receiver {
    pub fn new() -> Self {
        // You can manage state here, such as a buffer of audio packet bytes so
        // you can later store them in intervals.
-        Self { }
+        Self {}
    }
 }

@@ -63,9 +60,12 @@ impl VoiceEventHandler for Receiver {
    async fn act(&self, ctx: &EventContext<'_>) -> Option<Event> {
        use EventContext as Ctx;
        match ctx {
-            Ctx::SpeakingStateUpdate(
-                Speaking {speaking, ssrc, user_id, ..}
-            ) => {
+            Ctx::SpeakingStateUpdate(Speaking {
+                speaking,
+                ssrc,
+                user_id,
+                ..
+            }) => {
                // Discord voice calls use RTP, where every sender uses a randomly allocated
                // *Synchronisation Source* (SSRC) to allow receivers to tell which audio
                // stream a received packet belongs to. As this number is not derived from
@@ -79,9 +79,7 @@ impl VoiceEventHandler for Receiver {
                // to the user ID and handle their audio packets separately.
                println!(
                    "Speaking state update: user {:?} has SSRC {:?}, using {:?}",
-                    user_id,
-                    ssrc,
-                    speaking,
+                    user_id, ssrc, speaking,
                );
            },
            Ctx::SpeakingUpdate(data) => {
@@ -90,14 +88,17 @@ impl VoiceEventHandler for Receiver {
                println!(
                    "Source {} has {} speaking.",
                    data.ssrc,
-                    if data.speaking {"started"} else {"stopped"},
+                    if data.speaking { "started" } else { "stopped" },
                );
            },
            Ctx::VoicePacket(data) => {
                // An event which fires for every received audio packet,
                // containing the decoded data.
                if let Some(audio) = data.audio {
-                    println!("Audio packet's first 5 samples: {:?}", audio.get(..5.min(audio.len())));
+                    println!(
+                        "Audio packet's first 5 samples: {:?}",
+                        audio.get(..5.min(audio.len()))
+                    );
                    println!(
                        "Audio packet sequence {:05} has {:04} bytes (decompressed from {}), SSRC {}",
                        data.packet.sequence.0,
@@ -114,9 +115,7 @@ impl VoiceEventHandler for Receiver {
                // containing the call statistics and reporting information.
                println!("RTCP packet received: {:?}", data.packet);
            },
-            Ctx::ClientDisconnect(
-                ClientDisconnect {user_id, ..}
-            ) => {
+            Ctx::ClientDisconnect(ClientDisconnect { user_id, .. }) => {
                // You can implement your own logic here to handle a user who has left the
                // voice channel e.g., finalise processing of statistics etc.
                // You will typically need to map the User ID to their SSRC; observed when
@@ -127,7 +126,7 @@ impl VoiceEventHandler for Receiver {
            _ => {
                // We won't be registering this struct for any more event classes.
                unimplemented!()
-            }
+            },
        }

        None
@@ -141,24 +140,20 @@ struct General;
 #[tokio::main]
 async fn main() {
    tracing_subscriber::fmt::init();
-    
+
    // Configure the client with your Discord bot token in the environment.
-    let token = env::var("DISCORD_TOKEN")
-        .expect("Expected a token in the environment");
+    let token = env::var("DISCORD_TOKEN").expect("Expected a token in the environment");

    let framework = StandardFramework::new()
-        .configure(|c| c
-            .prefix("~"))
        .group(&GENERAL_GROUP);
+    framework.configure(|c| c.prefix("~"));

-    let intents = GatewayIntents::non_privileged()
-        | GatewayIntents::MESSAGE_CONTENT;
+    let intents = GatewayIntents::non_privileged() | GatewayIntents::MESSAGE_CONTENT;

    // Here, we need to configure Songbird to decode all incoming voice packets.
    // If you want, you can do this on a per-call basis---here, we need it to
    // read the audio data that other people are sending us!
-    let songbird_config = Config::default()
-        .decode_mode(DecodeMode::Decode);
+    let songbird_config = Config::default().decode_mode(DecodeMode::Decode);

    let mut client = Client::builder(&token, intents)
        .event_handler(Handler)
@@ -167,26 +162,33 @@ async fn main() {
        .await
        .expect("Err creating client");

-    let _ = client.start().await.map_err(|why| println!("Client ended: {:?}", why));
+    let _ = client
+        .start()
+        .await
+        .map_err(|why| println!("Client ended: {:?}", why));
 }

 #[command]
 #[only_in(guilds)]
 async fn join(ctx: &Context, msg: &Message, mut args: Args) -> CommandResult {
-    let connect_to = match args.single::<u64>() {
+    let connect_to = match args.single::<std::num::NonZeroU64>() {
        Ok(id) => ChannelId(id),
        Err(_) => {
-            check_msg(msg.reply(ctx, "Requires a valid voice channel ID be given").await);
+            check_msg(
+                msg.reply(ctx, "Requires a valid voice channel ID be given")
+                    .await,
+            );

            return Ok(());
        },
    };

-    let guild = msg.guild(&ctx.cache).unwrap();
-    let guild_id = guild.id;
+    let guild_id = msg.guild_id.unwrap();

-    let manager = songbird::get(ctx).await
-        .expect("Songbird Voice client placed in at initialisation.").clone();
+    let manager = songbird::get(ctx)
+        .await
+        .expect("Songbird Voice client placed in at initialisation.")
+        .clone();

    let (handler_lock, conn_result) = manager.join(guild_id, connect_to).await;

@@ -194,34 +196,27 @@ async fn join(ctx: &Context, msg: &Message, mut args: Args) -> CommandResult {
        // NOTE: this skips listening for the actual connection result.
        let mut handler = handler_lock.lock().await;

-        handler.add_global_event(
-            CoreEvent::SpeakingStateUpdate.into(),
-            Receiver::new(),
-        );
+        handler.add_global_event(CoreEvent::SpeakingStateUpdate.into(), Receiver::new());

-        handler.add_global_event(
-            CoreEvent::SpeakingUpdate.into(),
-            Receiver::new(),
-        );
+        handler.add_global_event(CoreEvent::SpeakingUpdate.into(), Receiver::new());

-        handler.add_global_event(
-            CoreEvent::VoicePacket.into(),
-            Receiver::new(),
-        );
+        handler.add_global_event(CoreEvent::VoicePacket.into(), Receiver::new());

-        handler.add_global_event(
-            CoreEvent::RtcpPacket.into(),
-            Receiver::new(),
-        );
+        handler.add_global_event(CoreEvent::RtcpPacket.into(), Receiver::new());

-        handler.add_global_event(
-            CoreEvent::ClientDisconnect.into(),
-            Receiver::new(),
-        );
+        handler.add_global_event(CoreEvent::ClientDisconnect.into(), Receiver::new());

-        check_msg(msg.channel_id.say(&ctx.http, &format!("Joined {}", connect_to.mention())).await);
+        check_msg(
+            msg.channel_id
+                .say(&ctx.http, &format!("Joined {}", connect_to.mention()))
+                .await,
+        );
    } else {
-        check_msg(msg.channel_id.say(&ctx.http, "Error joining the channel").await);
+        check_msg(
+            msg.channel_id
+                .say(&ctx.http, "Error joining the channel")
+                .await,
+        );
    }

    Ok(())
@@ -230,19 +225,24 @@ async fn join(ctx: &Context, msg: &Message, mut args: Args) -> CommandResult {
 #[command]
 #[only_in(guilds)]
 async fn leave(ctx: &Context, msg: &Message) -> CommandResult {
-    let guild = msg.guild(&ctx.cache).unwrap();
-    let guild_id = guild.id;
+    let guild_id = msg.guild_id.unwrap();

-    let manager = songbird::get(ctx).await
-        .expect("Songbird Voice client placed in at initialisation.").clone();
+    let manager = songbird::get(ctx)
+        .await
+        .expect("Songbird Voice client placed in at initialisation.")
+        .clone();
    let has_handler = manager.get(guild_id).is_some();

    if has_handler {
        if let Err(e) = manager.remove(guild_id).await {
-            check_msg(msg.channel_id.say(&ctx.http, format!("Failed: {:?}", e)).await);
+            check_msg(
+                msg.channel_id
+                    .say(&ctx.http, format!("Failed: {:?}", e))
+                    .await,
+            );
        }

-        check_msg(msg.channel_id.say(&ctx.http,"Left voice channel").await);
+        check_msg(msg.channel_id.say(&ctx.http, "Left voice channel").await);
    } else {
        check_msg(msg.reply(ctx, "Not in a voice channel").await);
    }
@@ -252,7 +252,7 @@ async fn leave(ctx: &Context, msg: &Message) -> CommandResult {

 #[command]
 async fn ping(ctx: &Context, msg: &Message) -> CommandResult {
-    check_msg(msg.channel_id.say(&ctx.http,"Pong!").await);
+    check_msg(msg.channel_id.say(&ctx.http, "Pong!").await);

    Ok(())
 }