From 67f4c3671eb5c7d8694aba4874daa2fe6b1cf172 Mon Sep 17 00:00:00 2001
From: Theron <tspiegl@gmail.com>
Date: Mon, 13 Jan 2020 23:36:54 -0600
Subject: [PATCH] replaced AudioQueue, went back to a callback that uses a
 shared Vec, mostly fixing the scratchy audio though it still needs tweaking.

---
 src/apu/mod.rs | 21 ++------------
 src/audio.rs   | 74 +++++++++++++++++++++++++++++++++++++++++++-------
 src/main.rs    | 31 ++++++++-------------
 3 files changed, 79 insertions(+), 47 deletions(-)
diff --git a/src/apu/mod.rs b/src/apu/mod.rs
index 1577321..e185c8d 100644
--- a/src/apu/mod.rs
+++ b/src/apu/mod.rs
@@ -12,14 +12,9 @@ use dmc::DMC;
 // Frame counter only ticks every 3728.5 APU ticks, and in audio frames of 4 or 5.
 // Length counter controls note durations.
 
-// We need to take a sample 44100 times per second. The CPU clocks (not steps) at 1.789773 MHz. Meaning the APU, going half as fast,
-// clocks 894,886.5 times per second. 894,886.5/44,100=20.29 APU clocks per audio sample.
-
 // TODO: organize APU structs
 
 const FRAME_COUNTER_STEPS: [usize; 5] = [3728, 7456, 11185, 14914, 18640];
-const CYCLES_PER_SAMPLE: f32 = 894_886.5/44_100.0; // APU frequency over sample frequency. May need to turn this down slightly as it's outputting less than 44_100Hz.
-// const CYCLES_PER_SAMPLE: f32 = 20.0;
 const LENGTH_COUNTER_TABLE: [u8; 32] = [
     10, 254, 20,  2, 40,  4, 80,  6, 160,  8, 60, 10, 14, 12, 26, 14,
     12,  16, 24, 18, 48, 20, 96, 22, 192, 24, 72, 26, 16, 28, 32, 30,
@@ -40,7 +35,6 @@ pub struct Apu {
     interrupt_inhibit: bool,
     frame_interrupt: bool,
     cycle: usize,
-    remainder: f32, // keep sample at 44100Hz
     pub trigger_irq: bool,
 }
 
@@ -63,14 +57,11 @@ impl Apu {
             interrupt_inhibit: false,
             frame_interrupt: false,
             cycle: 0,
-            remainder: 0_f32,
             trigger_irq: false,
         }
     }
 
-    pub fn clock(&mut self) -> Option<f32> {
-        let mut sample = None;
-
+    pub fn clock(&mut self) -> f32 {
         // Clock each channel
         self.square1.clock();
         self.square2.clock();
@@ -79,13 +70,6 @@ impl Apu {
         self.noise.clock();
         self.dmc.clock();
 
-        // Send sample to buffer if necessary
-        if self.remainder > CYCLES_PER_SAMPLE { 
-            sample = Some(self.mix());
-            self.remainder -= 20.0;
-        }
-        self.remainder += 1.0;
-
         // Step frame counter if necessary
         if FRAME_COUNTER_STEPS.contains(&self.cycle) {
             self.clock_frame_counter();
@@ -95,7 +79,8 @@ impl Apu {
             self.cycle = 0;
         }
 
-        sample
+        // Send all samples to buffer, let the SDL2 audio callback take what it needs
+        self.mix()
     }
 
     fn mix(&self) -> f32 {
diff --git a/src/audio.rs b/src/audio.rs
index b3e0946..fc14e79 100644
--- a/src/audio.rs
+++ b/src/audio.rs
@@ -1,15 +1,69 @@
 extern crate sdl2;
 
-use sdl2::audio::AudioSpecDesired;
+use std::sync::{Arc, Mutex};
+use sdl2::Sdl;
+use sdl2::audio::{AudioCallback, AudioSpecDesired};
 
-pub fn initialize(context: &sdl2::Sdl) -> Result<sdl2::audio::AudioQueue<f32>, String> {
-    let audio_subsystem = context.audio()?;
+const APU_SAMPLE_RATE: f32 = 894_886.5;
+const SDL_SAMPLE_RATE: i32 = 44_100;
+// Video runs at 60Hz, so console is clocked by doing enough work to create one frame of video, then sending the video and audio to their respective SDL
+// devices and then sleeping. So the audio device is set to play 44,100 samples per second, and grab them in 60 intervals over the course of that second.
+const SAMPLES_PER_FRAME: u16 = SDL_SAMPLE_RATE as u16/60;
 
-    let desired_spec = AudioSpecDesired {
-        freq: Some(44_100),
-        channels: Some(1), // mono
-        samples: None,     // default sample size
-    };
-
-    audio_subsystem.open_queue(None, &desired_spec)
+pub struct ApuSampler {
+    // This buffer receives all of the raw audio produced by the APU.
+    // The callback will take what it needs when it needs it and truncate the buffer for smooth audio output.
+    buffer: Arc<Mutex<Vec<f32>>>,
+    sample_ratio: f32,
+}
+
+impl AudioCallback for ApuSampler {
+    type Channel = f32;
+
+    fn callback(&mut self, out: &mut [f32]) {
+        let mut b = self.buffer.lock().unwrap();
+        // if we have data in the buffer
+        if b.len() > 0 {
+            // copy samples at the appropriate interval from the raw APU buffer to the output device
+            for (i, x) in out.iter_mut().enumerate() {
+                let sample_idx = ((i as f32) * self.sample_ratio) as usize;
+                if sample_idx < b.len() {
+                    *x = b[sample_idx];
+                }
+            }
+            let l = b.len();
+            // how many samples we would hope to have consumed
+            let target = (SAMPLES_PER_FRAME as f32 * self.sample_ratio) as usize;
+            // if we had more data than we needed, truncate what we used and keep the rest in case
+            // the callback is called twice before the buffer is refilled,
+            // but raise the ratio so we get closer to the speed at which the APU is working.
+            // if we didn't have enough, decrease the ratio so we take more samples from the APU
+            if l > target {
+                *b = b.split_off(target);
+                self.sample_ratio += 0.005;
+                // println!("raised ratio to {}", self.sample_ratio);
+            } else {
+                b.clear();
+                self.sample_ratio -= 0.05;
+                // println!("lowered ratio to {}", self.sample_ratio);
+            }
+        } else {
+            println!("buffer empty!"); // happens when the callback fires twice between video frames
+        }
+    }
+}
+
+pub fn initialize(sdl_context: &Sdl, buffer: Arc<Mutex<Vec<f32>>>) 
+    -> Result<sdl2::audio::AudioDevice<ApuSampler>, String> 
+{
+    let audio_subsystem = sdl_context.audio()?;
+    let desired_spec = AudioSpecDesired {
+        freq: Some(SDL_SAMPLE_RATE),
+        channels: Some(1), // mono
+        samples: Some(SAMPLES_PER_FRAME)
+    };
+    audio_subsystem.open_playback(None, &desired_spec, |spec| {
+        println!("{:?}", spec);
+        ApuSampler{buffer, sample_ratio: APU_SAMPLE_RATE / (SDL_SAMPLE_RATE as f32)}
+    })
 }
diff --git a/src/main.rs b/src/main.rs
index 62f2495..1fe8f90 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,3 @@
-use std::time::{Instant, Duration};
-
 mod cpu;
 mod ppu;
 mod apu;
@@ -15,6 +13,8 @@ use cartridge::get_mapper;
 use input::poll_buttons;
 use screen::{init_window, draw_pixel, draw_to_window};
 
+use std::sync::{Arc, Mutex};
+use std::time::{Instant, Duration};
 use sdl2::keyboard::Keycode;
 use sdl2::event::Event;
 use sdl2::pixels::PixelFormatEnum;
@@ -34,7 +34,10 @@ fn main() -> Result<(), String> {
     let mut screen_buffer = vec![0; byte_width * byte_height]; // contains raw RGB data for the screen
 
     // Set up audio
-    let audio_device = audio::initialize(&sdl_context).expect("Could not create audio device");
+    let mut temp_buffer = vec![]; // receives one sample each time the APU ticks. this is a staging buffer so we don't have to lock the mutex too much.
+    let apu_buffer = Arc::new(Mutex::new(Vec::<f32>::new())); // stays in this thread, receives raw samples between frames
+    let sdl_buffer = Arc::clone(&apu_buffer); // used in audio device's callback to select the samples it needs
+    let audio_device = audio::initialize(&sdl_context, sdl_buffer).expect("Could not create audio device");
     let mut half_cycle = false;
     audio_device.resume();
 
@@ -48,7 +51,6 @@ fn main() -> Result<(), String> {
     let mut timer = Instant::now();
     let mut fps_timer = Instant::now();
     let mut fps = 0;
-    let mut sps = 0;
 
     // PROFILER.lock().unwrap().start("./main.profile").unwrap();
     'running: loop {
@@ -65,14 +67,7 @@ fn main() -> Result<(), String> {
             }
         }
         for _ in 0..apu_cycles {
-            match cpu.apu.clock() {
-                Some(sample) => {
-                    sps += 1;
-                    if sps < 44_100 {audio_device.queue(&vec![sample]);} // TODO: fix this
-                    // audio_device.queue(&vec![sample]);
-                },
-                None => (),
-            };
+            temp_buffer.push(cpu.apu.clock());
         }
         // clock PPU three times for every CPU cycle
         for _ in 0..cpu_cycles * 3 {
@@ -87,6 +82,8 @@ fn main() -> Result<(), String> {
                 let now = Instant::now();
                 // if we're running faster than 60Hz, kill time
                 if now < timer + Duration::from_millis(1000/60) {
+                    let mut b = apu_buffer.lock().unwrap(); // unlock mutex to the real buffer
+                    b.append(&mut temp_buffer); // send this frame's audio data, emptying the temp buffer
                     std::thread::sleep(timer + Duration::from_millis(1000/60) - now);
                 }
                 timer = Instant::now();
@@ -111,9 +108,6 @@ fn main() -> Result<(), String> {
             println!("fps: {}", fps);
             fps = 0;
             fps_timer = now;
-
-            println!("samples per second: {}", sps);
-            sps = 0;
         }
     }
     // PROFILER.lock().unwrap().stop().unwrap();
@@ -125,7 +119,7 @@ fn main() -> Result<(), String> {
 TODO:
 - common mappers
 - untangle CPU and PPU
-- DMC audio channel, high- and low-pass filters, refactor envelope, fix static
+- DMC audio channel, high- and low-pass filters, refactor envelope
 - name audio variables (dividers, counters, etc.) more consistently
 - battery-backed RAM solution
 - GUI? drag and drop ROMs?
@@ -135,9 +129,8 @@ TODO:
 
 Timing notes:
 The PPU is throttled to 60Hz by sleeping in the main loop. This locks the CPU to roughly its intended speed, 1.789773MHz NTSC. The APU runs at half that.
-The SDL audio device samples/outputs at 44,100Hz, so as long as the APU queues up 44,100 samples per second, it works.
-But it's not doing so evenly. If PPU runs faster than 60Hz, audio will get skipped, and if slower, audio will pop/have gaps.
-Need to probably lock everything to the APU but worried about checking time that often. Can do for some division of 44_100.
+The APU gives all of its samples to the SDL audio device, which takes them 60 times per second in batches of 735 (44,100/60). It selects the ones
+it needs at the proper interval and truncates its buffer.
 
 Failed tests from instr_test-v5/rom_singles/:
 3, immediate, Failed. Just unofficial instructions?