Commit FS#12150 - Fully-functional audio mixer - and finally whip old limitations about playback of voice and other sounds when paused. Channels are independent in state and amplitude. Fade on stop/pause is handled by the channel's volume control rather than global volume which means it now works from anywhere. Opens up the possibility of plugin sounds during music playback by merely adding an additional channel enum. If any PCM drivers were not properly modified, see one of the last comments in the task for a description of the simple change that is expected. Some params are tunable in firmware/export/pcm-mixer.h as well.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30097 a1c6a512-1295-4272-9138-f99709370657
diff --git a/apps/SOURCES b/apps/SOURCES
index c122427..075ca9a 100644
--- a/apps/SOURCES
+++ b/apps/SOURCES
@@ -170,6 +170,9 @@
 playback.c
 codecs.c
 dsp.c
+#ifndef HAVE_HARDWARE_BEEP
+beep.c
+#endif
 #ifdef HAVE_PITCHSCREEN
 tdspeed.c
 #endif
diff --git a/apps/action.c b/apps/action.c
index eb5950b..aa19403 100644
--- a/apps/action.c
+++ b/apps/action.c
@@ -205,7 +205,7 @@
     /* Produce keyclick */
     if (global_settings.keyclick && !(button & BUTTON_REL))
         if (!(button & BUTTON_REPEAT) || global_settings.keyclick_repeats)
-            pcmbuf_beep(4000, KEYCLICK_DURATION, 2500*global_settings.keyclick);
+            beep_play(4000, KEYCLICK_DURATION, 2500*global_settings.keyclick);
 #endif
 
     if ((context != last_context) && ((last_button & BUTTON_REL) == 0)
diff --git a/apps/beep.c b/apps/beep.c
new file mode 100644
index 0000000..7168472
--- /dev/null
+++ b/apps/beep.c
@@ -0,0 +1,142 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (c) 2011 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "settings.h"
+#include "dsp.h"
+#include "pcm.h"
+#include "pcm_mixer.h"
+#include "misc.h"
+
+static int32_t beep_phase;      /* Phase of square wave generator */
+static uint32_t beep_step;      /* Step of square wave generator on each sample */
+static uint32_t beep_amplitude; /* Amplitude of square wave generator */
+static int beep_count;          /* Number of samples remaining to generate */
+
+/* Reserve enough static space for keyclick to fit */
+#define BEEP_BUF_COUNT (NATIVE_FREQUENCY / 1000 * KEYCLICK_DURATION)
+static uint32_t beep_buf[BEEP_BUF_COUNT] IBSS_ATTR;
+
+/* Actually output samples into beep_buf */
+#if defined(CPU_ARM)
+static FORCE_INLINE void beep_generate(int count)
+{
+    uint32_t *out = beep_buf;
+    uint32_t s;
+
+    asm volatile (
+    "1:                            \n"
+        "eor   %3, %5, %1, asr #31 \n"
+        "subs  %2, %2, #1          \n"
+        "str   %3, [%0], #4        \n"
+        "add   %1, %1, %4          \n"
+        "bgt   1b                  \n"
+        : "+r"(out), "+r"(beep_phase), "+r"(count),
+          "=&r"(s)
+        : "r"(beep_step), "r"(beep_amplitude));
+}
+#elif defined (CPU_COLDFIRE)
+static FORCE_INLINE void beep_generate(int count)
+{
+    uint32_t *out = beep_buf;
+    uint32_t s;
+
+    asm volatile (
+    "1:                   \n"
+        "move.l %1, %3    \n"
+        "add.l  %4, %1    \n"
+        "add.l  %3, %3    \n"
+        "subx.l %3, %3    \n"
+        "eor.l  %5, %3    \n"
+        "move.l %3, (%0)+ \n"
+        "subq.l #1, %2    \n"
+        "bgt.b  1b        \n"
+        : "+a"(out), "+d"(beep_phase), "+d"(count),
+          "=&d"(s)
+        : "r"(beep_step), "d"(beep_amplitude));
+}
+#else
+static FORCE_INLINE void beep_generate(int count)
+{
+    uint32_t *out = beep_buf;
+    uint32_t amplitude = beep_amplitude;
+    uint32_t step = beep_step;
+    int32_t phase = beep_phase;
+
+    do
+    {
+        *out++ = (phase >> 31) ^ amplitude;
+        phase += step;
+    }
+    while (--count > 0);
+
+    beep_phase = phase;
+}
+#endif
+
+/* Callback to generate the beep frames - also don't want inlining of
+   call below in beep_play */
+static void __attribute__((noinline)) ICODE_ATTR
+beep_get_more(unsigned char **start, size_t *size)
+{
+    int count = beep_count;
+
+    if (count > 0)
+    {
+        count = MIN(count, BEEP_BUF_COUNT);
+        beep_count -= count;
+        *start = (unsigned char *)beep_buf;
+        *size = count * sizeof(uint32_t);
+        beep_generate(count);
+    }
+}
+
+/* Generates a constant square wave sound with a given frequency in Hertz for
+   a duration in milliseconds */
+void beep_play(unsigned int frequency, unsigned int duration,
+               unsigned int amplitude)
+{
+    mixer_channel_stop(PCM_MIXER_CHAN_BEEP);
+
+    if (frequency == 0 || duration == 0 || amplitude == 0)
+        return;
+
+    if (amplitude > INT16_MAX)
+        amplitude = INT16_MAX;
+
+    /* Setup the parameters for the square wave generator */
+    beep_phase = 0;
+    beep_step = 0xffffffffu / NATIVE_FREQUENCY * frequency;
+    beep_count = NATIVE_FREQUENCY / 1000 * duration;
+    beep_amplitude = amplitude | (amplitude << 16); /* Word:|AMP16|AMP16| */
+
+    /* If it fits - avoid cb overhead */
+    unsigned char *start;
+    size_t size;
+
+    /* Generate first frame here */
+    beep_get_more(&start, &size);
+
+    mixer_channel_set_amplitude(PCM_MIXER_CHAN_BEEP, MIX_AMP_UNITY);
+    mixer_channel_play_data(PCM_MIXER_CHAN_BEEP,
+                            beep_count ? beep_get_more : NULL,
+                            start, size);
+}
diff --git a/apps/gui/wps.c b/apps/gui/wps.c
index e686fcc..cbf003a 100644
--- a/apps/gui/wps.c
+++ b/apps/gui/wps.c
@@ -121,9 +121,11 @@
 
 void pause_action(bool may_fade, bool updatewps)
 {
+#if CONFIG_CODEC != SWCODEC
     if (may_fade && global_settings.fade_on_stop)
         fade(false, updatewps);
     else
+#endif
         audio_pause();
 
     if (global_settings.pause_rewind) {
@@ -136,16 +138,22 @@
             - global_settings.pause_rewind * 1000;
         audio_ff_rewind(newpos > 0 ? newpos : 0);
     }
+
+    (void)may_fade; (void)updatewps;
 }
 
 void unpause_action(bool may_fade, bool updatewps)
 {
+#if CONFIG_CODEC != SWCODEC
     if (may_fade && global_settings.fade_on_stop)
         fade(true, updatewps);
     else
+#endif
         audio_resume();
+    (void)may_fade; (void)updatewps;
 }        
 
+#if CONFIG_CODEC != SWCODEC
 void fade(bool fade_in, bool updatewps)
 {
     int fp_global_vol = global_settings.volume << 8;
@@ -204,6 +212,7 @@
         sound_set_volume(global_settings.volume);
     }
 }
+#endif /* SWCODEC */ 
 
 static bool update_onvol_change(enum screen_type screen)
 {
@@ -569,7 +578,7 @@
     {
 #if CONFIG_CODEC == SWCODEC
         if(global_settings.beep)
-            pcmbuf_beep(1000, 150, 1500*global_settings.beep);
+            beep_play(1000, 150, 1500*global_settings.beep);
 #endif
         return;
     }
@@ -1127,9 +1136,12 @@
             status_set_record(false);
             status_set_audio(false);
 #endif
+#if CONFIG_CODEC != SWCODEC
             if (global_settings.fade_on_stop)
                 fade(false, true);
-
+#else
+            audio_pause();
+#endif
             if (bookmark)
                 bookmark_autobookmark(true);
             audio_stop();
diff --git a/apps/misc.c b/apps/misc.c
index 1f945c5..a0817d7 100644
--- a/apps/misc.c
+++ b/apps/misc.c
@@ -297,12 +297,13 @@
             splashf(0, "%s %s", str(LANG_WARNING_BATTERY_EMPTY),
                                 str(LANG_SHUTTINGDOWN));
         }
-
+#if CONFIG_CODEC != SWCODEC
         if (global_settings.fade_on_stop
             && (audio_stat & AUDIO_STATUS_PLAY))
         {
             fade(false, false);
         }
+#endif
 
         if (batt_safe) /* do not save on critical battery */
         {
@@ -380,8 +381,10 @@
     {
         if (!global_settings.party_mode)
         {
+#if CONFIG_CODEC != SWCODEC
             if (global_settings.fade_on_stop)
                 fade(false, false);
+#endif
             bookmark_autobookmark(true);
             audio_stop();
             ret = true;  /* bookmarking can make a refresh necessary */
diff --git a/apps/misc.h b/apps/misc.h
index 0b155db..c3c52d1 100644
--- a/apps/misc.h
+++ b/apps/misc.h
@@ -100,6 +100,9 @@
 #endif
 #endif
 
+void beep_play(unsigned int frequency, unsigned int duration,
+               unsigned int amplitude);
+
 enum current_activity {
     ACTIVITY_UNKNOWN = 0,
     ACTIVITY_MAINMENU,
diff --git a/apps/pcmbuf.c b/apps/pcmbuf.c
index 7201d39..f2f94e3 100644
--- a/apps/pcmbuf.c
+++ b/apps/pcmbuf.c
@@ -23,8 +23,9 @@
 #include "system.h"
 #include "debug.h"
 #include <kernel.h>
-#include "pcmbuf.h"
 #include "pcm.h"
+#include "pcm_mixer.h"
+#include "pcmbuf.h"
 #include "playback.h"
 #include "codec_thread.h"
 
@@ -49,9 +50,6 @@
 #define PCMBUF_MIN_CHUNK     4096 /* We try to never feed a chunk smaller than
                                      this to the DMA */
 #define CROSSFADE_BUFSIZE    8192 /* Size of the crossfade buffer */
-#define AUX_BUFSIZE           512 /* Size of the aux buffer; can be 512 if no
-                                     resampling or timestretching is allowed in
-                                     the aux channel, must be 2048 otherwise */
 
 /* number of bytes played per second (sample rate * 2 channels * 2 bytes/sample) */
 #define BYTERATE            (NATIVE_FREQUENCY * 4)
@@ -91,6 +89,12 @@
 /* Gapless playback */
 static bool track_transition IDATA_ATTR;
 
+/* Fade effect */
+static unsigned int fade_vol = MIX_AMP_UNITY;
+
+/* Voice */
+static bool soft_mode = false;
+
 #ifdef HAVE_CROSSFADE
 /* Crossfade buffer */
 static char *fadebuf IDATA_ATTR;
@@ -121,11 +125,6 @@
 static size_t pcmbuf_unplayed_bytes IDATA_ATTR;
 static size_t pcmbuf_watermark IDATA_ATTR;
 
-/* Voice */
-static char *voicebuf IDATA_ATTR;
-static struct chunkdesc *mix_chunk IDATA_ATTR;
-static size_t pcmbuf_mix_sample IDATA_ATTR;
-
 static bool low_latency_mode = false;
 static bool flush_pcmbuf = false;
 
@@ -317,10 +316,12 @@
  * Also maintain buffer level above the watermark. */
 static bool prepare_insert(size_t length)
 {
+    bool playing = mixer_channel_status(PCM_MIXER_CHAN_PLAYBACK) != CHANNEL_STOPPED;
+
     if (low_latency_mode)
     {
         /* 1/4s latency. */
-        if (!LOW_DATA(1) && pcm_is_playing())
+        if (!LOW_DATA(1) && playing)
             return false;
     }
 
@@ -329,7 +330,7 @@
         return false;
 
     /* Maintain the buffer level above the watermark */
-    if (pcm_is_playing())
+    if (playing)
     {
         /* Only codec thread initiates boost - voice boosts the cpu when playing
            a clip */
@@ -351,7 +352,7 @@
         }
 #endif
     }
-    else    /* pcm_is_playing */
+    else    /* !playing */
     {
         /* Boost CPU for pre-buffer */
         trigger_cpu_boost();
@@ -469,11 +470,14 @@
  * ...|---------PCMBUF---------|FADEBUF|VOICEBUF|DESCS|... */
 size_t pcmbuf_init(unsigned char *bufend)
 {
+    unsigned char *voicebuf;
+
     pcmbuf_bufend = bufend;
     pcmbuf_size = get_next_required_pcmbuf_size();
     write_chunk = (struct chunkdesc *)pcmbuf_bufend -
         NUM_CHUNK_DESCS(pcmbuf_size);
-    voicebuf = (char *)write_chunk - AUX_BUFSIZE;
+    voicebuf = (unsigned char *)write_chunk -
+                voicebuf_init((unsigned char *)write_chunk);
 #ifdef HAVE_CROSSFADE
     fadebuf = voicebuf - CROSSFADE_BUFSIZE;
     pcmbuffer = fadebuf - pcmbuf_size;
@@ -491,6 +495,8 @@
 
     pcmbuf_play_stop();
 
+    pcmbuf_soft_mode(false);
+
     return pcmbuf_bufend - pcmbuffer;
 }
 
@@ -572,7 +578,7 @@
 #ifdef HAVE_CROSSFADE
             pcmbuf_is_crossfade_active() ||
 #endif
-            !pcm_is_playing())
+            mixer_channel_status(PCM_MIXER_CHAN_PLAYBACK) == CHANNEL_STOPPED)
         {
             pcmbuf_play_stop();
             pcm_play_unlock();
@@ -652,10 +658,6 @@
         write_end_chunk->link = pcmbuf_current;
         write_end_chunk = pcmbuf_current;
 
-        /* If we've read over the mix chunk while it's still mixing there */
-        if (pcmbuf_current == mix_chunk)
-            mix_chunk = NULL;
-
 #ifdef HAVE_CROSSFADE
         /* If we've read over the crossfade chunk while it's still fading */
         if (pcmbuf_current == crossfade_chunk)
@@ -696,23 +698,23 @@
 /* Force playback */
 void pcmbuf_play_start(void)
 {
-    if (!pcm_is_playing() && pcmbuf_unplayed_bytes && read_chunk != NULL)
+    if (mixer_channel_status(PCM_MIXER_CHAN_PLAYBACK) == CHANNEL_STOPPED &&
+        pcmbuf_unplayed_bytes && read_chunk != NULL)
     {
         logf("pcmbuf_play_start");
         last_chunksize = read_chunk->size;
         pcmbuf_unplayed_bytes -= last_chunksize;
-        pcm_play_data(pcmbuf_pcm_callback,
-            read_chunk->addr, last_chunksize);
+        mixer_channel_play_data(PCM_MIXER_CHAN_PLAYBACK,
+                                pcmbuf_pcm_callback, NULL, 0);
     }
 }
 
 void pcmbuf_play_stop(void)
 {
     logf("pcmbuf_play_stop");
-    pcm_play_stop();
+    mixer_channel_stop(PCM_MIXER_CHAN_PLAYBACK);
 
     pcmbuf_unplayed_bytes = 0;
-    mix_chunk = NULL;
     if (read_chunk) {
         write_end_chunk->link = read_chunk;
         write_end_chunk = read_end_chunk;
@@ -737,8 +739,9 @@
 void pcmbuf_pause(bool pause)
 {
     logf("pcmbuf_pause: %s", pause?"pause":"play");
-    if (pcm_is_playing())
-        pcm_play_pause(!pause);
+
+    if (mixer_channel_status(PCM_MIXER_CHAN_PLAYBACK) != CHANNEL_STOPPED)
+        mixer_channel_play_pause(PCM_MIXER_CHAN_PLAYBACK, !pause);
     else if (!pause)
         pcmbuf_play_start();
 }
@@ -1031,102 +1034,6 @@
 #endif /* HAVE_CROSSFADE */
 
 
-/** Voice */
-
-/* Returns pcm buffer usage in percents (0 to 100). */
-static int pcmbuf_usage(void)
-{
-    return pcmbuf_unplayed_bytes * 100 / pcmbuf_size;
-}
-
-static int pcmbuf_mix_free(void)
-{
-    if (mix_chunk)
-    {
-        size_t my_mix_end =
-            (size_t)&((int16_t *)mix_chunk->addr)[pcmbuf_mix_sample];
-        size_t my_write_pos = (size_t)&pcmbuffer[pcmbuffer_pos];
-        if (my_write_pos < my_mix_end)
-            my_write_pos += pcmbuf_size;
-        return (my_write_pos - my_mix_end) * 100 / pcmbuf_unplayed_bytes;
-    }
-    return 100;
-}
-
-void *pcmbuf_request_voice_buffer(int *count)
-{
-    /* A get-it-to-work-for-now hack (audio status could change by
-       completion) */
-    if (audio_status() & AUDIO_STATUS_PLAY)
-    {
-        if (read_chunk == NULL)
-        {
-            return NULL;
-        }
-        else if (pcmbuf_usage() >= 10 && pcmbuf_mix_free() >= 30 &&
-                 (mix_chunk || read_chunk->link))
-        {
-            *count = MIN(*count, AUX_BUFSIZE/4);
-            return voicebuf;
-        }
-        else
-        {
-            return NULL;
-        }
-    }
-    else
-    {
-        return pcmbuf_request_buffer(count);
-    }
-}
-
-void pcmbuf_write_voice_complete(int count)
-{
-    /* A get-it-to-work-for-now hack (audio status could have changed) */
-    if (!(audio_status() & AUDIO_STATUS_PLAY))
-    {
-        pcmbuf_write_complete(count);
-        return;
-    }
-
-    int16_t *ibuf = (int16_t *)voicebuf;
-    int16_t *obuf;
-    size_t chunk_samples;
-
-    if (mix_chunk == NULL && read_chunk != NULL)
-    {
-        mix_chunk = read_chunk->link;
-        /* Start 1/8s into the next chunk */
-        pcmbuf_mix_sample = BYTERATE / 16;
-    }
-
-    if (!mix_chunk)
-        return;
-
-    obuf = (int16_t *)mix_chunk->addr;
-    chunk_samples = mix_chunk->size / sizeof (int16_t);
-
-    count <<= 1;
-
-    while (count-- > 0)
-    {
-        int32_t sample = *ibuf++;
-
-        if (pcmbuf_mix_sample >= chunk_samples)
-        {
-            mix_chunk = mix_chunk->link;
-            if (!mix_chunk)
-                return;
-            pcmbuf_mix_sample = 0;
-            obuf = (int16_t *)mix_chunk->addr;
-            chunk_samples = mix_chunk->size / 2;
-        }
-        sample += obuf[pcmbuf_mix_sample] >> 2;
-        obuf[pcmbuf_mix_sample++] = clip_sample_16(sample);
-    }
-}
-
-
 /** Debug menu, other metrics */
 
 /* Amount of bytes left in the buffer. */
@@ -1174,6 +1081,71 @@
 #endif
 
 
+/** Fading and channel volume control */
+
+/* Sync the channel amplitude to all states */
+static void pcmbuf_update_volume(void)
+{
+    unsigned int vol = fade_vol;
+
+    if (soft_mode)
+        vol >>= 2;
+
+    mixer_channel_set_amplitude(PCM_MIXER_CHAN_PLAYBACK, vol);
+}
+
+/* Quiet-down the channel if 'shhh' is true or else play at normal level */
+void pcmbuf_soft_mode(bool shhh)
+{
+    soft_mode = shhh;
+    pcmbuf_update_volume();
+}
+
+/* Fade channel in or out */
+void pcmbuf_fade(bool fade, bool in)
+{
+    if (!fade)
+    {
+        /* Simply set the level */
+        fade_vol = in ? MIX_AMP_UNITY : MIX_AMP_MUTE;
+    }
+    else
+    {
+        /* Start from the opposing end */
+        fade_vol = in ? MIX_AMP_MUTE : MIX_AMP_UNITY;
+    }
+
+    pcmbuf_update_volume();
+
+    if (fade)
+    {
+        /* Do this on thread for now */
+        int old_prio = thread_set_priority(thread_self(), PRIORITY_REALTIME);
+
+        while (1)
+        {
+            /* Linear fade actually sounds better */
+            if (in)
+                fade_vol += MIN(MIX_AMP_UNITY/16, MIX_AMP_UNITY - fade_vol);
+            else
+                fade_vol -= MIN(MIX_AMP_UNITY/16, fade_vol - MIX_AMP_MUTE);
+
+            pcmbuf_update_volume();
+
+            if (fade_vol > MIX_AMP_MUTE && fade_vol < MIX_AMP_UNITY)
+            {
+                sleep(0);
+                continue;
+            }
+
+            break;
+        }
+
+        thread_set_priority(thread_self(), old_prio);
+    }
+}
+
+
 /** Misc */
 
 bool pcmbuf_is_lowdata(void)
@@ -1201,107 +1173,6 @@
 
 unsigned long pcmbuf_get_latency(void)
 {
-    return (pcmbuf_unplayed_bytes + pcm_get_bytes_waiting()) * 1000 / BYTERATE;
+    return (pcmbuf_unplayed_bytes +
+        mixer_channel_get_bytes_waiting(PCM_MIXER_CHAN_PLAYBACK)) * 1000 / BYTERATE;
 }
-
-#ifndef HAVE_HARDWARE_BEEP
-#define MINIBUF_SAMPLES (NATIVE_FREQUENCY / 1000 * KEYCLICK_DURATION)
-#define MINIBUF_SIZE (MINIBUF_SAMPLES*4)
-
-/* Generates a constant square wave sound with a given frequency
-   in Hertz for a duration in milliseconds. */
-void pcmbuf_beep(unsigned int frequency, size_t duration, int amplitude)
-{
-    unsigned int step = 0xffffffffu / NATIVE_FREQUENCY * frequency;
-    int32_t phase = 0;
-    int16_t *bufptr, *bufstart, *bufend;
-    int32_t sample;
-    int nsamples = NATIVE_FREQUENCY / 1000 * duration;
-    bool mix = read_chunk != NULL && read_chunk->link != NULL;
-    int i;
-
-    bufend = SKIPBYTES((int16_t *)pcmbuffer, pcmbuf_size);
-
-    /* Find the insertion point and set bufstart to the start of it */
-    if (mix)
-    {
-        /* Get the currently playing chunk at the current position. */
-        bufstart = (int16_t *)pcm_play_dma_get_peak_buffer(&i);
-
-        /* If above isn't implemented or pcm is stopped, no beepeth. */
-        if (!bufstart || !pcm_is_playing())
-            return;
-
-        /* Give 5ms clearance. */
-        bufstart += BYTERATE / 200;
-
-#ifdef HAVE_PCM_DMA_ADDRESS
-        /* Returned peak addresses are DMA addresses */
-        bufend = pcm_dma_addr(bufend);
-#endif
-
-        /* Wrapped above? */
-        if (bufstart >= bufend)
-            bufstart -= pcmbuf_size;
-
-        /* NOTE: On some targets using hardware DMA, cache range flushing may
-         * be required or the writes may not be picked up by the controller.
-         * An incremental flush should be done periodically during the mixdown. */
-    }
-    else if (nsamples <= MINIBUF_SAMPLES)
-    {
-        static int16_t minibuf[MINIBUF_SAMPLES*2] __attribute__((aligned(4)));
-        /* Use mini buffer */
-        bufstart = minibuf;
-        bufend = SKIPBYTES(bufstart, MINIBUF_SIZE);
-    }
-    else if (!audio_buffer_state_trashed())
-    {
-        /* Use pcmbuffer */
-        bufstart = (int16_t *)pcmbuffer;
-    }
-    else
-    {
-        /* No place */
-        return;
-    }
-
-    bufptr = bufstart;
-
-    /* Mix square wave into buffer */
-    for (i = 0; i < nsamples; ++i)
-    {
-        int32_t amp = (phase >> 31) ^ (int32_t)amplitude;
-        sample = mix ? *bufptr : 0;
-        *bufptr++ = clip_sample_16(sample + amp);
-        if (bufptr >= bufend)
-            bufptr = (int16_t *)pcmbuffer;
-        sample = mix ? *bufptr : 0;
-        *bufptr++ = clip_sample_16(sample + amp);
-        if (bufptr >= bufend)
-            bufptr = (int16_t *)pcmbuffer;
-
-        phase += step;
-    }
-
-    pcm_play_lock();
-#ifdef HAVE_RECORDING
-    pcm_rec_lock();
-#endif
-
-    /* Kick off playback if required and it won't interfere */
-    if (!pcm_is_playing()
-#ifdef HAVE_RECORDING
-        && !pcm_is_recording()
-#endif
-        )
-    {
-        pcm_play_data(NULL, (unsigned char *)bufstart, nsamples * 4);
-    }
-
-    pcm_play_unlock();
-#ifdef HAVE_RECORDING
-    pcm_rec_unlock();
-#endif
-}
-#endif /* HAVE_HARDWARE_BEEP */
diff --git a/apps/pcmbuf.h b/apps/pcmbuf.h
index b7bf8c2..b7f5a3c 100644
--- a/apps/pcmbuf.h
+++ b/apps/pcmbuf.h
@@ -64,9 +64,10 @@
 #endif
 
 /* Misc */
+void pcmbuf_fade(bool fade, bool in);
+void pcmbuf_soft_mode(bool shhh);
 bool pcmbuf_is_lowdata(void);
 void pcmbuf_set_low_latency(bool state);
 unsigned long pcmbuf_get_latency(void);
-void pcmbuf_beep(unsigned int frequency, size_t duration, int amplitude);
 
 #endif
diff --git a/apps/playback.c b/apps/playback.c
index 2775e8a..cbb94a9 100644
--- a/apps/playback.c
+++ b/apps/playback.c
@@ -39,6 +39,7 @@
 #include "abrepeat.h"
 #include "pcmbuf.h"
 #include "playback.h"
+#include "misc.h"
 
 #ifdef HAVE_TAGCACHE
 #include "tagcache.h"
@@ -2360,6 +2361,9 @@
 #ifndef PLATFORM_HAS_VOLUME_CHANGE
         sound_set_volume(global_settings.volume);
 #endif
+        /* Be sure channel is audible */
+        pcmbuf_fade(false, true);
+
         /* Update our state */
         play_status = PLAY_PLAYING;
     }
@@ -2413,6 +2417,8 @@
     if (play_status == PLAY_STOPPED)
         return;
 
+    pcmbuf_fade(global_settings.fade_on_stop, false);
+
     /* Stop the codec and unload it */
     halt_decoding_track(true);
     pcmbuf_play_stop();
@@ -2452,6 +2458,11 @@
     if (play_status == PLAY_STOPPED || pause == (play_status == PLAY_PAUSED))
         return;
 
+    bool const do_fade = global_settings.fade_on_stop;
+
+    if (pause)
+        pcmbuf_fade(do_fade, false);
+
     if (!ff_rw_mode)
     {
         /* Not in ff/rw mode - may set the state (otherwise this could make
@@ -2459,6 +2470,9 @@
         pcmbuf_pause(pause);
     }
 
+    if (!pause)
+        pcmbuf_fade(do_fade, true);
+
     play_status = pause ? PLAY_PAUSED : PLAY_PLAYING;
 
     if (!pause && codec_skip_pending)
@@ -3170,7 +3184,7 @@
 /* May pcmbuf start PCM playback when the buffer is full enough? */
 bool audio_pcmbuf_may_play(void)
 {
-    return play_status != PLAY_PAUSED && !ff_rw_mode;
+    return play_status == PLAY_PLAYING && !ff_rw_mode;
 }
 
 
@@ -3339,7 +3353,7 @@
         skip_offset = accum;
 
         if (global_settings.beep)
-            pcmbuf_beep(2000, 100, 2500*global_settings.beep);
+            beep_play(2000, 100, 2500*global_settings.beep);
 
         LOGFQUEUE("audio > audio Q_AUDIO_SKIP %d", offset);
 
@@ -3360,7 +3374,7 @@
     {
         /* No more tracks */
         if (global_settings.beep)
-            pcmbuf_beep(1000, 100, 1500*global_settings.beep);
+            beep_play(1000, 100, 1500*global_settings.beep);
     }
 
     id3_mutex_unlock();
diff --git a/apps/plugin.c b/apps/plugin.c
index d9f7c4e..10cb926 100644
--- a/apps/plugin.c
+++ b/apps/plugin.c
@@ -551,7 +551,7 @@
     pcm_get_peak_buffer,
     pcm_play_lock,
     pcm_play_unlock,
-    pcmbuf_beep,
+    beep_play,
 #ifdef HAVE_RECORDING
     &rec_freq_sampr[0],
     pcm_init_recording,
@@ -778,6 +778,9 @@
 
     /* new stuff at the end, sort into place next time
        the API gets incompatible */
+
+    mixer_channel_status,
+    mixer_channel_get_buffer,
 };
 
 int plugin_load(const char* plugin, const void* parameter)
diff --git a/apps/plugin.h b/apps/plugin.h
index f15c626..113296c 100644
--- a/apps/plugin.h
+++ b/apps/plugin.h
@@ -65,6 +65,7 @@
 #include "misc.h"
 #include "filefuncs.h"
 #if (CONFIG_CODEC == SWCODEC)
+#include "pcm_mixer.h"
 #include "dsp.h"
 #include "codecs.h"
 #include "playback.h"
@@ -145,7 +146,7 @@
 #define PLUGIN_MAGIC 0x526F634B /* RocK */
 
 /* increase this every time the api struct changes */
-#define PLUGIN_API_VERSION 205
+#define PLUGIN_API_VERSION 206
 
 /* update this to latest version if a change to the api struct breaks
    backwards compatibility (and please take the opportunity to sort in any
@@ -635,9 +636,8 @@
     const void* (*pcm_get_peak_buffer)(int *count);
     void (*pcm_play_lock)(void);
     void (*pcm_play_unlock)(void);
-    void (*pcmbuf_beep)(unsigned int frequency,
-                        size_t duration,
-                        int amplitude);
+    void (*beep_play)(unsigned int frequency, unsigned int duration,
+                      unsigned int amplitude);
 #ifdef HAVE_RECORDING
     const unsigned long *rec_freq_sampr;
     void (*pcm_init_recording)(void);
@@ -908,6 +908,8 @@
 
     /* new stuff at the end, sort into place next time
        the API gets incompatible */
+    enum channel_status (*mixer_channel_status)(enum pcm_mixer_channel channel);
+    void * (*mixer_channel_get_buffer)(enum pcm_mixer_channel channel, int *count);
 };
 
 /* plugin header */
diff --git a/apps/plugins/fft/fft.c b/apps/plugins/fft/fft.c
index b6b1e2f..a920f8c 100644
--- a/apps/plugins/fft/fft.c
+++ b/apps/plugins/fft/fft.c
@@ -1137,6 +1137,10 @@
 /********************* End of plotting functions (modes) *********************/
 
 /****************************** FFT functions ********************************/
+static bool is_playing(void)
+{
+    return rb->mixer_channel_status(PCM_MIXER_CHAN_PLAYBACK) == CHANNEL_PLAYING;
+}
 
 /** functions use in single/multi configuration **/
 static inline bool fft_init_fft_lib(void)
@@ -1156,7 +1160,8 @@
 static inline bool fft_get_fft(void)
 {
     int count;
-    int16_t *value = (int16_t *) rb->pcm_get_peak_buffer(&count);
+    int16_t *value =
+        (int16_t *) rb->mixer_channel_get_buffer(PCM_MIXER_CHAN_PLAYBACK, &count);
     /* This block can introduce discontinuities in our data. Meaning, the
      * FFT will not be done a continuous segment of the signal. Which can
      * be bad. Or not.
@@ -1214,7 +1219,7 @@
 
     while(fft_thread_run)
 	{
-        if (!rb->pcm_is_playing())
+        if (!is_playing())
         {
             rb->sleep(HZ/5);
             continue;
@@ -1296,7 +1301,7 @@
  * target uses IRAM */
 static bool fft_have_fft(void)
 {
-    return rb->pcm_is_playing() && fft_get_fft();
+    return is_playing() && fft_get_fft();
 }
 
 static inline void fft_free_fft_output(void)
@@ -1366,7 +1371,7 @@
 		{
             int timeout;
 
-            if(!rb->pcm_is_playing())
+            if(!is_playing())
             {
                 showing_warning = true;
                 mylcd_clear_display();
diff --git a/apps/recorder/keyboard.c b/apps/recorder/keyboard.c
index 1b2d76e..5f2a32c 100644
--- a/apps/recorder/keyboard.c
+++ b/apps/recorder/keyboard.c
@@ -1231,7 +1231,7 @@
         state->editpos -= dir;
 #if CONFIG_CODEC == SWCODEC
         if (global_settings.talk_menu)
-            pcmbuf_beep(1000, 150, 1500);
+            beep_play(1000, 150, 1500);
 #endif
     }
 }
diff --git a/apps/voice_thread.c b/apps/voice_thread.c
index 6683fcc..3318bbe 100644
--- a/apps/voice_thread.c
+++ b/apps/voice_thread.c
@@ -27,6 +27,8 @@
 #include "audio.h"
 #include "playback.h"
 #include "pcmbuf.h"
+#include "pcm.h"
+#include "pcm_mixer.h"
 #include "codecs/libspeex/speex/speex.h"
 
 /* Define any of these as "1" and uncomment the LOGF_ENABLE line to log
@@ -53,24 +55,50 @@
 #define IBSS_ATTR_VOICE_STACK IBSS_ATTR
 #endif
 
+/* Minimum priority needs to be a bit elevated since voice has fairly low
+   latency */
+#define PRIORITY_VOICE (PRIORITY_PLAYBACK-4)
+
 #define VOICE_FRAME_SIZE    320 /* Samples / frame */
 #define VOICE_SAMPLE_RATE 16000 /* Sample rate in HZ */
 #define VOICE_SAMPLE_DEPTH   16 /* Sample depth in bits */
 
 /* Voice thread variables */
 static unsigned int voice_thread_id = 0;
-static long voice_stack[(DEFAULT_STACK_SIZE + 0x3C0)/sizeof(long)] IBSS_ATTR_VOICE_STACK;
+#ifdef CPU_COLDFIRE
+/* ISR uses any available stack - need a bit more room */
+#define VOICE_STACK_EXTRA   0x400
+#else
+#define VOICE_STACK_EXTRA   0x3c0
+#endif
+static long voice_stack[(DEFAULT_STACK_SIZE + VOICE_STACK_EXTRA)/sizeof(long)]
+    IBSS_ATTR_VOICE_STACK;
 static const char voice_thread_name[] = "voice";
 
 /* Voice thread synchronization objects */
 static struct event_queue voice_queue SHAREDBSS_ATTR;
-static struct mutex voice_mutex SHAREDBSS_ATTR;
 static struct queue_sender_list voice_queue_sender_list SHAREDBSS_ATTR;
 static bool voice_done SHAREDDATA_ATTR = true;
 
 /* Buffer for decoded samples */
 static spx_int16_t voice_output_buf[VOICE_FRAME_SIZE] CACHEALIGN_ATTR;
 
+#define VOICE_PCM_FRAME_COUNT   ((NATIVE_FREQUENCY*VOICE_FRAME_SIZE + \
+                                 VOICE_SAMPLE_RATE) / VOICE_SAMPLE_RATE)
+#define VOICE_PCM_FRAME_SIZE    (VOICE_PCM_FRAME_COUNT*4)
+
+/* Default number of native-frequency PCM frames to queue - adjust as
+   necessary per-target */
+#define VOICE_FRAMES            3
+
+/* Might have lookahead and be skipping samples, so size is needed */
+static size_t voicebuf_sizes[VOICE_FRAMES];
+static uint32_t (* voicebuf)[VOICE_PCM_FRAME_COUNT];
+static unsigned int cur_buf_in, cur_buf_out;
+
+/* A delay to not bring audio back to normal level too soon */
+#define QUIET_COUNT 3
+
 enum voice_thread_states
 {
     TSTATE_STOPPED = 0,   /* Voice thread is stopped and awaiting commands */
@@ -83,7 +111,6 @@
     Q_VOICE_NULL = 0, /* A message for thread sync - no effect on state */
     Q_VOICE_PLAY,     /* Play a clip */
     Q_VOICE_STOP,     /* Stop current clip */
-    Q_VOICE_STATE,    /* Query playing state */
 };
 
 /* Structure to store clip data callback info */
@@ -98,7 +125,7 @@
  * internal functions */
 struct voice_thread_data
 {
-    int state;              /* Thread state (TSTATE_*) */
+    volatile int state;     /* Thread state (TSTATE_*) */
     struct queue_event ev;  /* Last queue event pulled from queue */
     void *st;               /* Decoder instance */
     SpeexBits bits;         /* Bit cursor */
@@ -107,33 +134,79 @@
     const char *src[2];     /* Current output buffer pointers */
     int lookahead;          /* Number of samples to drop at start of clip */
     int count;              /* Count of samples remaining to send to PCM */
+    int quiet_counter;      /* Countdown until audio goes back to normal */
 };
 
-/* Audio playback is in a playing state? */
-static inline bool playback_is_playing(void)
+/* Number of frames in queue */
+static inline int voice_unplayed_frames(void)
 {
-    return (audio_status() & AUDIO_STATUS_PLAY) != 0;
+    return cur_buf_in - cur_buf_out;
+}
+
+/* Mixer channel callback */
+static void voice_pcm_callback(unsigned char **start, size_t *size)
+{
+    if (voice_unplayed_frames() == 0)
+        return; /* Done! */
+
+    unsigned int i = ++cur_buf_out % VOICE_FRAMES;
+
+    *start = (unsigned char *)voicebuf[i];
+    *size = voicebuf_sizes[i];
+}
+
+/* Start playback of voice channel if not already playing */
+static void voice_start_playback(void)
+{
+    if (mixer_channel_status(PCM_MIXER_CHAN_VOICE) != CHANNEL_STOPPED)
+        return;
+
+    unsigned int i = cur_buf_out % VOICE_FRAMES;
+    mixer_channel_play_data(PCM_MIXER_CHAN_VOICE, voice_pcm_callback,
+                            (unsigned char *)voicebuf[i], voicebuf_sizes[i]);
+}
+
+/* Stop the voice channel */
+static void voice_stop_playback(void)
+{
+    mixer_channel_stop(PCM_MIXER_CHAN_VOICE);
+    cur_buf_in = cur_buf_out = 0;
+}
+
+/* Grab a free PCM frame */
+static uint32_t * voice_buf_get(void)
+{
+    if (voice_unplayed_frames() >= VOICE_FRAMES)
+    {
+        /* Full */
+        voice_start_playback();
+        return NULL;
+    }
+
+    return voicebuf[cur_buf_in % VOICE_FRAMES];
+}
+
+/* Commit a frame returned by voice_buf_get and set the actual size */
+static void voice_buf_commit(size_t size)
+{
+    voicebuf_sizes[cur_buf_in++ % VOICE_FRAMES] = size;
 }
 
 /* Stop any current clip and start playing a new one */
 void mp3_play_data(const unsigned char* start, int size,
                    pcm_play_callback_type get_more)
 {
-    /* Shared struct to get data to the thread - once it replies, it has
-     * safely cached it in its own private data */
-    static struct voice_info voice_clip SHAREDBSS_ATTR;
-
     if (get_more != NULL && start != NULL && (ssize_t)size > 0)
     {
-        mutex_lock(&voice_mutex);
+        struct voice_info voice_clip =
+        {
+            .get_more = get_more,
+            .start    = (unsigned char *)start,
+            .size     = size,
+        };
 
-        voice_clip.get_more = get_more;
-        voice_clip.start    = (unsigned char *)start;
-        voice_clip.size     = size;
         LOGFQUEUE("mp3 >| voice Q_VOICE_PLAY");
         queue_send(&voice_queue, Q_VOICE_PLAY, (intptr_t)&voice_clip);
-
-        mutex_unlock(&voice_mutex);
     }
 }
 
@@ -143,11 +216,8 @@
     if(!audio_is_thread_ready())
        return;
 
-    mutex_lock(&voice_mutex); /* Sync against voice_stop */
-    LOGFQUEUE("mp3 >| voice Q_VOICE_STOP: 1");
-    queue_send(&voice_queue, Q_VOICE_STOP, 1);
-
-    mutex_unlock(&voice_mutex);
+    LOGFQUEUE("mp3 >| voice Q_VOICE_STOP");
+    queue_send(&voice_queue, Q_VOICE_STOP, 0);
 }
 
 void mp3_play_pause(bool play)
@@ -156,36 +226,19 @@
     (void)play;
 }
 
-/* Tell is voice is still in a playing state */
+/* Tell if voice is still in a playing state */
 bool mp3_is_playing(void)
 {
-    /* TODO: Implement a timeout or state query function for event objects */
-    LOGFQUEUE("mp3 >| voice Q_VOICE_STATE");
-    int state = queue_send(&voice_queue, Q_VOICE_STATE, 0);
-    return state != TSTATE_STOPPED;
+    return !voice_done;
 }
 
 /* This function is meant to be used by the buffer request functions to
    ensure the codec is no longer active */
 void voice_stop(void)
 {
-    mutex_lock(&voice_mutex);
-
-    /* Stop the output and current clip */
-    mp3_play_stop();
-
-    /* Careful if using sync objects in talk.c - make sure locking order is
-     * observed with one or the other always granted first */
-
     /* Unqueue all future clips */
     talk_force_shutup();
-
-    /* Wait for any final queue_post to be processed */
-    LOGFQUEUE("mp3 >| voice Q_VOICE_NULL");
-    queue_send(&voice_queue, Q_VOICE_NULL, 0);
-
-    mutex_unlock(&voice_mutex);
-} /* voice_stop */
+}
 
 /* Wait for voice to finish speaking. */
 void voice_wait(void)
@@ -194,8 +247,7 @@
      * new clip by the time we wait. This should be resolvable if conditions
      * ever require knowing the very clip you requested has finished. */
 
-    /* Wait for PCM buffer to be exhausted. Works only if not playing. */
-    while(!voice_done || (!playback_is_playing() && pcm_is_playing()))
+    while (!voice_done)
         sleep(1);
 }
 
@@ -211,6 +263,9 @@
     dsp_configure(td->dsp, DSP_SET_FREQUENCY, VOICE_SAMPLE_RATE);
     dsp_configure(td->dsp, DSP_SET_SAMPLE_DEPTH, VOICE_SAMPLE_DEPTH);
     dsp_configure(td->dsp, DSP_SET_STEREO_MODE, STEREO_MONO);
+
+    mixer_channel_set_amplitude(PCM_MIXER_CHAN_VOICE, MIX_AMP_UNITY);
+    td->quiet_counter = 0;
 }
 
 /* Voice thread message processing */
@@ -222,7 +277,6 @@
         {
         case Q_VOICE_PLAY:
             LOGFQUEUE("voice < Q_VOICE_PLAY");
-            /* Put up a block for completion signal */
             voice_done = false;
 
             /* Copy the clip info */
@@ -239,12 +293,17 @@
                 /* Boost CPU now */
                 trigger_cpu_boost();
             }
-            else if (!playback_is_playing())
+            else
             {
-                /* Just voice, stop any clip still playing */
-                pcmbuf_play_stop();
+                /* Stop any clip still playing */
+                voice_stop_playback();
             }
 
+            /* Make audio play more softly and set delay to return to normal
+               playback level */
+            pcmbuf_soft_mode(true);
+            td->quiet_counter = QUIET_COUNT;
+
             /* Clean-start the decoder */
             td->st = speex_decoder_init(&speex_wb_mode);
 
@@ -255,51 +314,57 @@
             td->state = TSTATE_DECODE;
             return;
 
-        case Q_VOICE_STOP:
-            LOGFQUEUE("voice < Q_VOICE_STOP: %ld", (long)td->ev.data);
-
-            if (td->ev.data != 0 && !playback_is_playing())
+        case SYS_TIMEOUT:
+            if (voice_unplayed_frames())
             {
-                /* If not playing, it's just voice so stop pcm playback */
-                pcmbuf_play_stop();
+                /* Waiting for PCM to finish */
+                break;
             }
 
-            /* Cancel boost */
-            cancel_cpu_boost();
+            /* Drop through and stop the first time after clip runs out */
+            if (td->quiet_counter-- != QUIET_COUNT)
+            {
+                if (td->quiet_counter <= 0)
+                    pcmbuf_soft_mode(false);
+
+                break;
+            }
+
+            /* Fall-through */
+        case Q_VOICE_STOP:
+            LOGFQUEUE("voice < Q_VOICE_STOP");
 
             td->state = TSTATE_STOPPED;
             voice_done = true;
+
+            cancel_cpu_boost();
+            voice_stop_playback();
             break;
 
-        case Q_VOICE_STATE:
-            LOGFQUEUE("voice < Q_VOICE_STATE");
-            queue_reply(&voice_queue, td->state);
-
-            if (td->state == TSTATE_STOPPED)
-                break; /* Not in a playback state */
-
-            return;
-
         default:
             /* Default messages get a reply and thread continues with no
              * state transition */
             LOGFQUEUE("voice < default");
 
             if (td->state == TSTATE_STOPPED)
-                break;  /* Not in playback state */
+                break;  /* Not in (active) playback state */
 
             queue_reply(&voice_queue, 0);
             return;
         }
 
-        queue_wait(&voice_queue, &td->ev);
+        if (td->quiet_counter > 0)
+            queue_wait_w_tmo(&voice_queue, &td->ev, HZ/10);
+        else
+            queue_wait(&voice_queue, &td->ev);
     }
 }
 
 /* Voice thread entrypoint */
-static void voice_thread(void)
+static void NORETURN_ATTR voice_thread(void)
 {
     struct voice_thread_data td;
+    char *dest;
 
     voice_data_init(&td);
 
@@ -361,19 +426,10 @@
             }
 
             /* If all clips are done and not playing, force pcm playback. */
-            if (!pcm_is_playing())
-                pcmbuf_play_start();
+            voice_start_playback();
 
-            /* Synthesize a stop request */
-            /* NOTE: We have no way to know when the pcm data placed in the
-             * buffer is actually consumed and playback has reached the end
-             * so until the info is available or inferred somehow, this will
-             * not be accurate and the stopped signal will come too soon.
-             * ie. You may not hear the "Shutting Down" splash even though
-             * it waits for voice to stop. */
-            td.ev.id = Q_VOICE_STOP;
-            td.ev.data = 0; /* Let PCM drain by itself */
-            yield();
+            td.state = TSTATE_STOPPED;
+            td.ev.id = SYS_TIMEOUT;
             goto message_process;
         }
 
@@ -385,62 +441,39 @@
         td.src[1] = NULL;
         td.lookahead -= MIN(VOICE_FRAME_SIZE, td.lookahead);
 
-    buffer_insert:
-        /* Process the PCM samples in the DSP and send out for mixing */
+        if (td.count <= 0)
+            continue;
+
         td.state = TSTATE_BUFFER_INSERT;
 
-        while (td.count > 0)
+    buffer_insert:
+        /* Process the PCM samples in the DSP and send out for mixing */
+
+        while (1)
         {
-            int out_count = dsp_output_count(td.dsp, td.count);
-            int inp_count;
-            char *dest;
+            if (!queue_empty(&voice_queue))
+                goto message_wait;
 
-            while (1)
-            {
-                if (!queue_empty(&voice_queue))
-                    goto message_wait;
-
-                if ((dest = pcmbuf_request_voice_buffer(&out_count)) != NULL)
-                    break;
-
-                yield();
-            }
-
-            /* Get the real input_size for output_size bytes, guarding
-             * against resampling buffer overflows. */
-            inp_count = dsp_input_count(td.dsp, out_count);
-
-            if (inp_count <= 0)
+            if ((dest = (char *)voice_buf_get()) != NULL)
                 break;
 
-            /* Input size has grown, no error, just don't write more than
-             * length */
-            if (inp_count > td.count)
-                inp_count = td.count;
-
-            out_count = dsp_process(td.dsp, dest, td.src, inp_count);
-
-            if (out_count <= 0)
-                break;
-
-            pcmbuf_write_voice_complete(out_count);
-            td.count -= inp_count;
+            yield();
         }
 
-        yield();
+        voice_buf_commit(dsp_process(td.dsp, dest, td.src, td.count)
+                         * sizeof (int32_t));
     } /* end while */
-} /* voice_thread */
+}
 
 /* Initialize all synchronization objects create the thread */
 void voice_thread_init(void)
 {
     logf("Starting voice thread");
     queue_init(&voice_queue, false);
-    mutex_init(&voice_mutex);
 
     voice_thread_id = create_thread(voice_thread, voice_stack,
             sizeof(voice_stack), CREATE_THREAD_FROZEN,
-            voice_thread_name IF_PRIO(, PRIORITY_PLAYBACK) IF_COP(, CPU));
+            voice_thread_name IF_PRIO(, PRIORITY_VOICE) IF_COP(, CPU));
 
     queue_enable_queue_send(&voice_queue, &voice_queue_sender_list,
                             voice_thread_id);
@@ -457,6 +490,18 @@
 /* Set the voice thread priority */
 void voice_thread_set_priority(int priority)
 {
+    if (priority > PRIORITY_VOICE)
+        priority = PRIORITY_VOICE;
+
     thread_set_priority(voice_thread_id, priority);
 }
 #endif
+
+/* Initialize voice PCM buffer and return size, allocated from the end */
+size_t voicebuf_init(unsigned char *bufend)
+{
+    size_t size = VOICE_FRAMES * VOICE_PCM_FRAME_SIZE;
+    cur_buf_out = cur_buf_in = 0;
+    voicebuf = (uint32_t (*)[VOICE_PCM_FRAME_COUNT])(bufend - size);
+    return size;
+}
diff --git a/apps/voice_thread.h b/apps/voice_thread.h
index 4359825..1529f7e 100644
--- a/apps/voice_thread.h
+++ b/apps/voice_thread.h
@@ -29,8 +29,13 @@
 
 void voice_wait(void);
 void voice_stop(void);
+
 void voice_thread_init(void);
 void voice_thread_resume(void);
+#ifdef HAVE_PRIORITY_SCHEDULING
 void voice_thread_set_priority(int priority);
+#endif
+
+size_t voicebuf_init(unsigned char *bufend);
 
 #endif /* VOICE_THREAD_H */
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 7c3a909..85d9a28 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -311,6 +311,7 @@
 #ifndef BOOTLOADER
 pcm_sampr.c
 pcm.c
+pcm_mixer.c
 #ifdef HAVE_RECORDING
 enc_base.c
 #endif /* HAVE_RECORDING */
diff --git a/firmware/export/config.h b/firmware/export/config.h
index 2c7c6e8..70047ff 100644
--- a/firmware/export/config.h
+++ b/firmware/export/config.h
@@ -1048,4 +1048,9 @@
 #define HAVE_IO_PRIORITY
 #endif
 
+#if defined(CPU_COLDIRE) || CONFIG_CPU == IMX31L
+/* Can record and play simultaneously */
+#define HAVE_PCM_FULL_DUPLEX
+#endif
+
 #endif /* __CONFIG_H__ */
diff --git a/firmware/export/pcm-internal.h b/firmware/export/pcm-internal.h
new file mode 100644
index 0000000..d69138f
--- /dev/null
+++ b/firmware/export/pcm-internal.h
@@ -0,0 +1,81 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 by Linus Nielsen Feltzing
+ * Copyright (C) 2011 by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef PCM_INTERNAL_H
+#define PCM_INTERNAL_H
+
+/** The following are for internal use between pcm.c and target-
+    specific portion **/
+
+/* Called by the bottom layer ISR when more data is needed. Returns non-
+ * zero size if more data is to be played. Setting start to NULL
+ * forces stop. */
+void pcm_play_get_more_callback(void **start, size_t *size);
+
+/* Called by the bottom layer ISR after next transfer has begun in order
+   to fill more data for next "get more" callback to implement double-buffered
+   callbacks - except for a couple ASM handlers, help drivers to implement
+   this functionality with minimal overhead */
+static FORCE_INLINE void pcm_play_dma_started_callback(void)
+{
+    extern void (* pcm_play_dma_started)(void);
+    void (* callback)(void) = pcm_play_dma_started;
+    if (callback)
+        callback();
+}
+
+extern unsigned long pcm_curr_sampr;
+extern unsigned long pcm_sampr;
+extern int pcm_fsel;
+
+#ifdef HAVE_PCM_DMA_ADDRESS
+void * pcm_dma_addr(void *addr);
+#endif
+
+extern volatile bool pcm_playing;
+extern volatile bool pcm_paused;
+
+void pcm_play_dma_lock(void);
+void pcm_play_dma_unlock(void);
+void pcm_play_dma_init(void) INIT_ATTR;
+void pcm_play_dma_start(const void *addr, size_t size);
+void pcm_play_dma_stop(void);
+void pcm_play_dma_pause(bool pause);
+const void * pcm_play_dma_get_peak_buffer(int *count);
+
+void pcm_dma_apply_settings(void);
+
+#ifdef HAVE_RECORDING
+
+/* DMA transfer in is currently active */
+extern volatile bool pcm_recording;
+
+/* APIs implemented in the target-specific portion */
+void pcm_rec_dma_init(void);
+void pcm_rec_dma_close(void);
+void pcm_rec_dma_start(void *addr, size_t size);
+void pcm_rec_dma_record_more(void *start, size_t size);
+void pcm_rec_dma_stop(void);
+const void * pcm_rec_dma_get_peak_buffer(void);
+
+#endif /* HAVE_RECORDING */
+
+#endif /* PCM_INTERNAL_H */
diff --git a/firmware/export/pcm.h b/firmware/export/pcm.h
index 80b5b09..22c5ef3 100644
--- a/firmware/export/pcm.h
+++ b/firmware/export/pcm.h
@@ -49,7 +49,7 @@
 
 /** RAW PCM routines used with playback and recording **/
 
-/* Typedef for registered callback */
+/* Typedef for registered callbacks */
 typedef void (*pcm_play_callback_type)(unsigned char **start,
                                        size_t *size);
 typedef void (*pcm_rec_callback_type)(int status, void **start, size_t *size);
@@ -90,34 +90,7 @@
 bool pcm_is_paused(void);
 bool pcm_is_playing(void);
 
-/** The following are for internal use between pcm.c and target-
-    specific portion **/
-
-/* Called by the bottom layer ISR when more data is needed. Returns non-
- * zero size if more data is to be played. Setting start to NULL
- * forces stop. */
-void pcm_play_get_more_callback(void **start, size_t *size);
-
-extern unsigned long pcm_curr_sampr;
-extern unsigned long pcm_sampr;
-extern int pcm_fsel;
-
-#ifdef HAVE_PCM_DMA_ADDRESS
-void * pcm_dma_addr(void *addr);
-#endif
-
-extern volatile bool pcm_playing;
-extern volatile bool pcm_paused;
-
-void pcm_play_dma_lock(void);
-void pcm_play_dma_unlock(void);
-void pcm_play_dma_init(void) INIT_ATTR;
-void pcm_play_dma_start(const void *addr, size_t size);
-void pcm_play_dma_stop(void);
-void pcm_play_dma_pause(bool pause);
-const void * pcm_play_dma_get_peak_buffer(int *count);
-
-void pcm_dma_apply_settings(void);
+void pcm_play_set_dma_started_callback(void (* callback)(void));
 
 #ifdef HAVE_RECORDING
 
@@ -148,19 +121,6 @@
 
 void pcm_calculate_rec_peaks(int *left, int *right);
 
-/** The following are for internal use between pcm.c and target-
-    specific portion **/
-/* DMA transfer in is currently active */
-extern volatile bool pcm_recording;
-
-/* APIs implemented in the target-specific portion */
-void pcm_rec_dma_init(void);
-void pcm_rec_dma_close(void);
-void pcm_rec_dma_start(void *addr, size_t size);
-void pcm_rec_dma_record_more(void *start, size_t size);
-void pcm_rec_dma_stop(void);
-const void * pcm_rec_dma_get_peak_buffer(void);
-
 #endif /* HAVE_RECORDING */
 
 #endif /* PCM_PLAYBACK_H */
diff --git a/firmware/export/pcm_mixer.h b/firmware/export/pcm_mixer.h
new file mode 100644
index 0000000..3b420e1
--- /dev/null
+++ b/firmware/export/pcm_mixer.h
@@ -0,0 +1,102 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2011 by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifndef PCM_MIXER_H
+#define PCM_MIXER_H
+
+/** Simple config **/
+
+/* Length of PCM frames (always) */
+#if CONFIG_CPU == PP5002
+/* There's far less time to do mixing because HW FIFOs are short */
+#define MIX_FRAME_SAMPLES 64
+#else
+/* Assume HW DMA engine is available or sufficient latency exists in the
+   PCM pathway */
+#define MIX_FRAME_SAMPLES 256
+#endif
+
+#if defined(CPU_COLDFIRE) || defined(CPU_PP)
+/* For Coldfire, it's just faster
+   For PortalPlayer, this also avoids more expensive cache coherency */
+#define DOWNMIX_BUF_IBSS    IBSS_ATTR
+#else
+/* Otherwise can't DMA from IRAM, IRAM is pointless or worse */
+#define DOWNMIX_BUF_IBSS
+#endif
+
+
+/** Definitions **/
+
+/* Channels are preassigned for simplicity */
+enum pcm_mixer_channel
+{
+    PCM_MIXER_CHAN_PLAYBACK = 0,
+    PCM_MIXER_CHAN_VOICE,
+#ifndef HAVE_HARDWARE_BEEP
+    PCM_MIXER_CHAN_BEEP,
+#endif
+    /* Add new channel indexes above this line */
+    PCM_MIXER_NUM_CHANNELS,
+};
+
+/* Channel playback states */
+enum channel_status
+{
+    CHANNEL_STOPPED = 0,
+    CHANNEL_PLAYING,
+    CHANNEL_PAUSED,
+};
+
+#define MIX_AMP_UNITY    0x00010000
+#define MIX_AMP_MUTE     0x00000000
+
+
+/** Public interfaces **/
+
+/* Start playback on a channel */
+void mixer_channel_play_data(enum pcm_mixer_channel channel,
+                             pcm_play_callback_type get_more,
+                             unsigned char *start, size_t size);
+
+/* Pause or resume a channel (when started) */
+void mixer_channel_play_pause(enum pcm_mixer_channel channel, bool play);
+
+/* Stop playback on a channel */
+void mixer_channel_stop(enum pcm_mixer_channel channel);
+
+/* Set channel's amplitude factor */
+void mixer_channel_set_amplitude(enum pcm_mixer_channel channel,
+                                 unsigned int amplitude);
+
+/* Return channel's playback status */
+enum channel_status mixer_channel_status(enum pcm_mixer_channel channel);
+
+/* Returns amount data remaining in channel before next callback */
+size_t mixer_channel_get_bytes_waiting(enum pcm_mixer_channel channel);
+
+/* Return pointer to channel's playing audio data and the size remaining */
+void * mixer_channel_get_buffer(enum pcm_mixer_channel channel, int *count);
+
+/* Stop ALL channels and PCM and reset state */
+void mixer_reset(void);
+
+#endif /* PCM_MIXER_H */
diff --git a/firmware/pcm.c b/firmware/pcm.c
index d15c129..b7415f3 100644
--- a/firmware/pcm.c
+++ b/firmware/pcm.c
@@ -28,6 +28,8 @@
 #include "audio.h"
 #include "sound.h"
 #include "general.h"
+#include "pcm-internal.h"
+#include "pcm_mixer.h"
 
 /**
  * Aspects implemented in the target-specific portion:
@@ -78,8 +80,8 @@
  */
 
 /* the registered callback function to ask for more mp3 data */
-static volatile pcm_play_callback_type pcm_callback_for_more
-    SHAREDBSS_ATTR = NULL;
+static pcm_play_callback_type pcm_callback_for_more SHAREDBSS_ATTR = NULL;
+void (* pcm_play_dma_started)(void) SHAREDBSS_ATTR = NULL;
 /* PCM playback state */
 volatile bool pcm_playing SHAREDBSS_ATTR = false;
 /* PCM paused state. paused implies playing */
@@ -95,6 +97,7 @@
 static void pcm_play_stopped(void)
 {
     pcm_callback_for_more = NULL;
+    pcm_play_dma_started = NULL;
     pcm_paused = false;
     pcm_playing = false;
 }
@@ -404,6 +407,12 @@
     }
 }
 
+/* register callback to buffer more data */
+void pcm_play_set_dma_started_callback(void (* callback)(void))
+{
+    pcm_play_dma_started = callback;
+}
+
 #ifdef HAVE_RECORDING
 /** Low level pcm recording apis **/
 
@@ -475,6 +484,11 @@
 {
     logf("pcm_init_recording");
 
+#ifndef HAVE_PCM_FULL_DUPLEX
+    /* Stop the beasty before attempting recording */
+    mixer_reset();
+#endif
+
     /* Recording init is locked unlike general pcm init since this is not
      * just a one-time event at startup and it should and must be safe by
      * now. */
diff --git a/firmware/pcm_mixer.c b/firmware/pcm_mixer.c
new file mode 100644
index 0000000..cddd3f0
--- /dev/null
+++ b/firmware/pcm_mixer.c
@@ -0,0 +1,501 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2011 by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "general.h"
+#include "kernel.h"
+#include "pcm.h"
+#include "pcm_mixer.h"
+#include "dsp.h"
+
+/* Channels use standard-style PCM callback interface but a latency of one
+   frame by double-buffering is introduced in order to facilitate mixing and
+   keep the hardware fed. There must be sufficient time to perform operations
+   before the last samples are sent to the codec and so things are done in
+   parallel (as much as possible) with sending-out data. */
+
+/* Define this to nonzero to add a marker pulse at each frame start */
+#define FRAME_BOUNDARY_MARKERS 0
+
+/* Descriptor for each channel */
+struct mixer_channel
+{
+    unsigned char *start;            /* Buffer pointer */
+    size_t size;                     /* Bytes remaining */
+    size_t last_size;                /* Size of consumed data in prev. cycle */
+    pcm_play_callback_type get_more; /* Registered callback */
+    enum channel_status status;      /* Playback status */
+    uint32_t amplitude;              /* Amp. factor: 0x0000 = mute, 0x10000 = unity */
+};
+
+/* Forget about boost here for the moment */
+#define MIX_FRAME_SIZE      (MIX_FRAME_SAMPLES*4)
+
+/* Because of the double-buffering, playback is always from here, otherwise a
+   mechanism for the channel callbacks not to free buffers too early would be
+   needed (if we _really_ want it and it's worth it, we _can_ do that ;-) ) */
+static uint32_t downmix_buf[2][MIX_FRAME_SAMPLES] DOWNMIX_BUF_IBSS MEM_ALIGN_ATTR;
+static int downmix_index = 0;   /* Which downmix_buf? */
+static size_t next_size = 0;    /* Size of buffer to play next time */
+
+/* Descriptors for all available channels */
+static struct mixer_channel channels[PCM_MIXER_NUM_CHANNELS] IBSS_ATTR;
+
+/* Packed pointer array of all playing (active) channels in "channels" array */
+static struct mixer_channel * active_channels[PCM_MIXER_NUM_CHANNELS+1] IBSS_ATTR;
+
+/* Number of silence frames to play after all data has played */
+#define MAX_IDLE_FRAMES     (NATIVE_FREQUENCY*3 / MIX_FRAME_SAMPLES)
+static unsigned int idle_counter = 0;
+
+/* Cheapo buffer align macro to align to the 16-16 PCM size */
+#define ALIGN_CHANNEL(start, size) \
+    ({ start = (void *)(((uintptr_t)start + 3) & ~3); \
+       size &= ~3; })
+
+/* Include any implemented CPU-optimized mixdown routines */
+#if defined(CPU_ARM)
+#if ARM_ARCH >= 6
+#include "pcm-mixer-armv6.c"
+#elif ARM_ARCH >= 5
+#include "pcm-mixer-armv5.c"
+#else
+#include "pcm-mixer-armv4.c"
+#endif /* ARM_ARCH */
+#elif defined (CPU_COLDFIRE)
+#include "pcm-mixer-coldfire.c"
+#endif /* CPU_* */
+
+
+/** Generic mixing routines **/
+
+#ifndef MIXER_OPTIMIZED_MIX_SAMPLES
+/* Clip sample to signed 16 bit range */
+static FORCE_INLINE int32_t clip_sample_16(int32_t sample)
+{
+    if ((int16_t)sample != sample)
+        sample = 0x7fff ^ (sample >> 31);
+    return sample;
+}
+
+/* Mix channels' samples and apply gain factors */
+static FORCE_INLINE void mix_samples(uint32_t *out,
+                                     int16_t *src0,
+                                     int32_t src0_amp,
+                                     int16_t *src1,
+                                     int32_t src1_amp,
+                                     size_t size)
+{
+    if (src0_amp == MIX_AMP_UNITY && src1_amp == MIX_AMP_UNITY)
+    {
+        /* Both are unity amplitude */
+        do
+        {
+            int32_t l = *src0++ + *src1++;
+            int32_t h = *src0++ + *src1++;
+            *out++ = (uint16_t)clip_sample_16(l) | (clip_sample_16(h) << 16);
+        }
+        while ((size -= 4) > 0);
+    }
+    else if (src0_amp != MIX_AMP_UNITY && src1_amp != MIX_AMP_UNITY)
+    {
+        /* Neither are unity amplitude */
+        do
+        {
+            int32_t l = (*src0++ * src0_amp >> 16) + (*src1++ * src1_amp >> 16);
+            int32_t h = (*src0++ * src0_amp >> 16) + (*src1++ * src1_amp >> 16);
+            *out++ = (uint16_t)clip_sample_16(l) | (clip_sample_16(h) << 16);
+        }
+        while ((size -= 4) > 0);
+    }
+    else
+    {
+        /* One is unity amplitude */
+        if (src0_amp != MIX_AMP_UNITY)
+        {
+            /* Keep unity in src0, amp0 */
+            int16_t *src_tmp = src0;
+            src0 = src1;
+            src1 = src_tmp;
+            src1_amp = src0_amp;
+            src0_amp = MIX_AMP_UNITY;
+        }
+
+        do
+        {
+            int32_t l = *src0++ + (*src1++ * src1_amp >> 16);
+            int32_t h = *src0++ + (*src1++ * src1_amp >> 16);
+            *out++ = (uint16_t)clip_sample_16(l) | (clip_sample_16(h) << 16);
+        }
+        while ((size -= 4) > 0);
+    }
+}
+#endif /* MIXER_OPTIMIZED_MIX_SAMPLES */
+
+#ifndef MIXER_OPTIMIZED_WRITE_SAMPLES
+/* Write channel's samples and apply gain factor */
+static FORCE_INLINE void write_samples(uint32_t *out,
+                                       int16_t *src,
+                                       int32_t amp,
+                                       size_t size)
+{
+    if (LIKELY(amp == MIX_AMP_UNITY))
+    {
+        /* Channel is unity amplitude */
+        memcpy(out, src, size);
+    }
+    else
+    {
+        /* Channel needs amplitude cut */
+        do
+        {
+            int32_t l = *src++ * amp >> 16;
+            int32_t h = *src++ * amp & 0xffff0000;
+            *out++ = (uint16_t)l | h;
+        }
+        while ((size -= 4) > 0);
+    }     
+}
+#endif /* MIXER_OPTIMIZED_WRITE_SAMPLES */
+
+
+/** Private generic routines **/
+
+/* Mark channel active to mix its data */
+static void mixer_activate_channel(struct mixer_channel *chan)
+{
+    void **elem = find_array_ptr((void **)active_channels, chan);
+
+    if (!*elem)
+    {
+        idle_counter = 0;
+        *elem = chan;
+    }
+}
+
+/* Stop channel from mixing */
+static void mixer_deactivate_channel(struct mixer_channel *chan)
+{
+    remove_array_ptr((void **)active_channels, chan);
+}
+
+/* Deactivate channel and change it to stopped state */
+static void channel_stopped(struct mixer_channel *chan)
+{
+    mixer_deactivate_channel(chan);
+    chan->size = 0;
+    chan->start = NULL;
+    chan->status = CHANNEL_STOPPED;
+}
+
+/* Main PCM callback - sends the current prepared frame to play */
+static void mixer_pcm_callback(unsigned char **start, size_t *size)
+{
+    *start = (unsigned char *)downmix_buf[downmix_index];
+    *size = next_size;
+}
+
+/* Buffering callback - calls sub-callbacks and mixes the data for next
+   buffer to be sent from mixer_pcm_callback() */
+static void ICODE_ATTR mixer_buffer_callback(void)
+{
+    downmix_index ^= 1; /* Next buffer */
+
+    void *mixptr = downmix_buf[downmix_index];
+    size_t mixsize = MIX_FRAME_SIZE;
+    struct mixer_channel **chan_p;
+
+    next_size = 0;
+
+    /* "Loop" back here if one round wasn't enough to fill a frame */
+fill_frame:
+    chan_p = active_channels;
+
+    while (*chan_p)
+    {
+        /* Find the active channel with the least data remaining and call any
+           callbacks for channels that ran out - stopping whichever report
+           "no more" */
+        struct mixer_channel *chan = *chan_p;
+        chan->start += chan->last_size;
+        chan->size -= chan->last_size;
+
+        if (chan->size == 0)
+        {
+            if (chan->get_more)
+            {
+                chan->get_more(&chan->start, &chan->size);
+                ALIGN_CHANNEL(chan->start, chan->size);
+            }
+
+            if (!(chan->start && chan->size))
+            {
+                /* Channel is stopping */
+                channel_stopped(chan);
+                continue;
+            }
+        }
+
+        /* Channel will play for at least part of this frame */
+
+        /* Channel with least amount of data remaining determines the downmix
+           size */
+        if (chan->size < mixsize)
+            mixsize = chan->size;
+
+        chan_p++;
+    }
+
+    /* Add all still-active channels to the downmix */
+    chan_p = active_channels;
+
+    if (LIKELY(*chan_p))
+    {
+        struct mixer_channel *chan = *chan_p++;
+
+        if (LIKELY(!*chan_p))
+        {
+            write_samples(mixptr, (void *)chan->start,
+                          chan->amplitude, mixsize);
+        }
+        else
+        {
+            void *src0, *src1;
+            unsigned int amp0, amp1;
+
+            /* Mix first two channels with each other as the downmix */
+            src0 = chan->start;
+            amp0 = chan->amplitude;
+            chan->last_size = mixsize;
+
+            chan = *chan_p++;
+            src1 = chan->start;
+            amp1 = chan->amplitude;
+
+            while (1)
+            {
+                mix_samples(mixptr, src0, amp0, src1, amp1, mixsize);
+
+                if (!*chan_p)
+                    break;
+
+                /* More channels to mix - mix each with existing downmix */
+                chan->last_size = mixsize;
+                chan = *chan_p++;
+                src0 = mixptr;
+                amp0 = MIX_AMP_UNITY;
+                src1 = chan->start;
+                amp1 = chan->amplitude;
+            }
+        }
+
+        chan->last_size = mixsize;
+        next_size += mixsize;
+
+        if (next_size < MIX_FRAME_SIZE)
+        {
+            /* There is still space remaining in this frame */
+            mixptr += mixsize;
+            mixsize = MIX_FRAME_SIZE - next_size;
+            goto fill_frame;
+        }
+    }
+    else if (idle_counter++ < MAX_IDLE_FRAMES)
+    {
+        /* Pad incomplete frames with silence */
+        if (idle_counter <= 3)
+            memset(mixptr, 0, MIX_FRAME_SIZE - next_size);
+
+        next_size = MIX_FRAME_SIZE;
+    }
+    /* else silence period ran out - go to sleep */
+
+#if FRAME_BOUNDARY_MARKERS != 0
+    if (next_size)
+        *downmix_buf[downmix_index] = downmix_index ? 0x7fff7fff : 0x80008000;
+#endif
+}
+
+/* Start PCM driver if it's not currently playing */
+static void mixer_start_pcm(void)
+{
+    if (pcm_is_playing())
+        return;
+
+#if defined(HAVE_RECORDING) && !defined(HAVE_PCM_FULL_DUPLEX)
+    if (pcm_is_recording())
+        return;
+#endif
+
+    /* Prepare initial frames and set up the double buffer */
+    mixer_buffer_callback();
+
+    /* Save the previous call's output */
+    void *start = downmix_buf[downmix_index];
+
+    mixer_buffer_callback();
+
+    pcm_play_set_dma_started_callback(mixer_buffer_callback);
+    pcm_play_data(mixer_pcm_callback, start, MIX_FRAME_SIZE);
+}
+
+/* Initialize the channel and start it if it has data */
+static void mixer_channel_play_start(struct mixer_channel *chan,
+                                     pcm_play_callback_type get_more,
+                                     unsigned char *start, size_t size)
+{
+    pcm_play_unlock(); /* Allow playback while doing any callback */
+
+    ALIGN_CHANNEL(start, size);
+
+    if (!(start && size))
+    {
+        /* Initial buffer not passed - call the callback now */
+        size = 0;
+        if (get_more)
+        {
+            get_more(&start, &size);
+            ALIGN_CHANNEL(start, size);
+        }
+    }
+
+    if (start && size)
+    {
+        /* We have data - start the channel */
+        chan->status = CHANNEL_PLAYING;
+        chan->start = start;
+        chan->size = size;
+        chan->last_size = 0;
+        chan->get_more = get_more;
+
+        pcm_play_lock();
+        mixer_activate_channel(chan);
+        mixer_start_pcm();
+    }
+    else
+    {
+        /* Never had anything - stop it now */
+        pcm_play_lock();
+        channel_stopped(chan);
+    }
+}
+
+
+/** Public interfaces **/
+
+/* Start playback on a channel */
+void mixer_channel_play_data(enum pcm_mixer_channel channel,
+                             pcm_play_callback_type get_more,
+                             unsigned char *start, size_t size)
+{
+    struct mixer_channel *chan = &channels[channel];
+
+    pcm_play_lock();
+    mixer_deactivate_channel(chan);
+    mixer_channel_play_start(chan, get_more, start, size);
+    pcm_play_unlock();
+}
+
+/* Pause or resume a channel (when started) */
+void mixer_channel_play_pause(enum pcm_mixer_channel channel, bool play)
+{
+    struct mixer_channel *chan = &channels[channel];
+
+    pcm_play_lock();
+
+    if (play == (chan->status == CHANNEL_PAUSED) &&
+        chan->status != CHANNEL_STOPPED)
+    {
+        if (play)
+        {
+            chan->status = CHANNEL_PLAYING;
+            mixer_activate_channel(chan);
+            mixer_start_pcm();
+        }
+        else
+        {
+            mixer_deactivate_channel(chan);
+            chan->status = CHANNEL_PAUSED;
+        }
+    }
+
+    pcm_play_unlock();
+}
+
+/* Stop playback on a channel */
+void mixer_channel_stop(enum pcm_mixer_channel channel)
+{
+    struct mixer_channel *chan = &channels[channel];
+
+    pcm_play_lock();
+    channel_stopped(chan);
+    pcm_play_unlock();
+}
+
+/* Set channel's amplitude factor */
+void mixer_channel_set_amplitude(enum pcm_mixer_channel channel,
+                                 unsigned int amplitude)
+{
+    channels[channel].amplitude = MIN(amplitude, MIX_AMP_UNITY);
+}
+
+/* Return channel's playback status */
+enum channel_status mixer_channel_status(enum pcm_mixer_channel channel)
+{
+    return channels[channel].status;
+}
+
+/* Returns amount data remaining in channel before next callback */
+size_t mixer_channel_get_bytes_waiting(enum pcm_mixer_channel channel)
+{
+    return channels[channel].size;
+}
+
+/* Return pointer to channel's playing audio data and the size remaining */
+void * mixer_channel_get_buffer(enum pcm_mixer_channel channel, int *count)
+{
+    struct mixer_channel *chan = &channels[channel];
+    void * buf = *(unsigned char * volatile *)&chan->start;
+    size_t size = *(size_t volatile *)&chan->size;
+    void * buf2 = *(unsigned char * volatile *)&chan->start;
+
+    /* Still same buffer? */
+    if (buf == buf2)
+    {
+        *count = size >> 2;
+        return buf;
+    }
+    /* else can't be sure buf and size are related */
+
+    *count = 0;
+    return NULL;
+}
+
+/* Stop ALL channels and PCM and reset state */
+void mixer_reset(void)
+{
+    pcm_play_stop();
+
+    while (*active_channels)
+        channel_stopped(*active_channels);
+
+    idle_counter = 0;
+}
diff --git a/firmware/target/arm/as3525/pcm-as3525.c b/firmware/target/arm/as3525/pcm-as3525.c
index 469833b..1b22d48 100644
--- a/firmware/target/arm/as3525/pcm-as3525.c
+++ b/firmware/target/arm/as3525/pcm-as3525.c
@@ -29,6 +29,7 @@
 #include "as3514.h"
 #include "audiohw.h"
 #include "mmu-arm.h"
+#include "pcm-internal.h"
 
 #define MAX_TRANSFER (4*((1<<11)-1)) /* maximum data we can transfer via DMA
                                       * i.e. 32 bits at once (size of I2SO_DATA)
@@ -104,9 +105,13 @@
 
         /* force writeback */
         clean_dcache_range(dma_start_addr, dma_start_size);
+        play_start_pcm();
+        pcm_play_dma_started_callback();
     }
-
-    play_start_pcm();
+    else
+    {
+        play_start_pcm();
+    }
 }
 
 void pcm_play_dma_start(const void *addr, size_t size)
diff --git a/firmware/target/arm/imx31/gigabeat-s/pcm-gigabeat-s.c b/firmware/target/arm/imx31/gigabeat-s/pcm-gigabeat-s.c
index c8c1283..1f6eef4 100644
--- a/firmware/target/arm/imx31/gigabeat-s/pcm-gigabeat-s.c
+++ b/firmware/target/arm/imx31/gigabeat-s/pcm-gigabeat-s.c
@@ -26,6 +26,7 @@
 #include "ccm-imx31.h"
 #include "sdma-imx31.h"
 #include "mmu-imx31.h"
+#include "pcm-internal.h"
 
 #define DMA_PLAY_CH_NUM 2
 #define DMA_REC_CH_NUM 1
@@ -105,6 +106,8 @@
     dma_play_bd.mode.command = TRANSFER_16BIT;
     dma_play_bd.mode.status = BD_DONE | BD_WRAP | BD_INTR;
     sdma_channel_run(DMA_PLAY_CH_NUM);
+
+    pcm_play_dma_started_callback();
 }
 
 void pcm_play_lock(void)
diff --git a/firmware/target/arm/pcm-mixer-armv4.c b/firmware/target/arm/pcm-mixer-armv4.c
new file mode 100644
index 0000000..4818544
--- /dev/null
+++ b/firmware/target/arm/pcm-mixer-armv4.c
@@ -0,0 +1,182 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2011 by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#define MIXER_OPTIMIZED_WRITE_SAMPLES
+#define MIXER_OPTIMIZED_MIX_SAMPLES
+
+/* Mix channels' samples and apply gain factors */
+static FORCE_INLINE void mix_samples(void *out,
+                                     void *src0,
+                                     int32_t src0_amp,
+                                     void *src1,
+                                     int32_t src1_amp,
+                                     size_t size)
+{
+    if (src0_amp == MIX_AMP_UNITY && src1_amp == MIX_AMP_UNITY)
+    {
+        /* Both are unity amplitude */
+        int32_t l0, l1, h0, h1;
+        asm volatile (
+        "1:                             \n"
+            "ldrsh  %4, [%1], #2        \n"
+            "ldrsh  %5, [%2], #2        \n"
+            "ldrsh  %6, [%1], #2        \n"
+            "ldrsh  %7, [%2], #2        \n"
+            "add    %4, %4, %5          \n"
+            "add    %6, %6, %7          \n"
+            "mov    %5, %4, asr #15     \n"
+            "teq    %5, %5, asr #31     \n"
+            "eorne  %4, %8, %4, asr #31 \n"
+            "mov    %7, %6, asr #15     \n"
+            "teq    %7, %7, asr #31     \n"
+            "eorne  %6, %8, %6, asr #31 \n"
+            "subs   %3, %3, #4          \n"
+            "and    %4, %4, %8, lsr #16 \n"
+            "orr    %6, %4, %6, lsl #16 \n"
+            "str    %6, [%0], #4        \n"
+            "bhi    1b                  \n"
+            : "+r"(out), "+r"(src0), "+r"(src1), "+r"(size),
+              "=&r"(l0), "=&r"(l1), "=&r"(h0), "=&r"(h1)
+            : "r"(0xffff7fff));
+    }
+    else if (src0_amp != MIX_AMP_UNITY && src1_amp != MIX_AMP_UNITY)
+    {
+        /* Neither are unity amplitude */
+        int32_t l0, l1, h0, h1;
+        asm volatile (
+        "1:                              \n"
+            "ldrsh  %4, [%1], #2         \n"
+            "ldrsh  %5, [%2], #2         \n"
+            "ldrsh  %6, [%1], #2         \n"
+            "ldrsh  %7, [%2], #2         \n"
+            "mul    %4, %8, %4           \n"
+            "mul    %5, %9, %5           \n"
+            "mul    %6, %8, %6           \n"
+            "mul    %7, %9, %7           \n"
+            "mov    %4, %4, asr #16      \n"
+            "add    %4, %4, %5, asr #16  \n"
+            "mov    %6, %6, asr #16      \n"
+            "add    %6, %6, %7, asr #16  \n"
+            "mov    %5, %4, asr #15      \n"
+            "teq    %5, %5, asr #31      \n"
+            "eorne  %4, %10, %4, asr #31 \n"
+            "mov    %7, %6, asr #15      \n"
+            "teq    %7, %7, asr #31      \n"
+            "eorne  %6, %10, %6, asr #31 \n"
+            "subs   %3, %3, #4           \n"
+            "and    %4, %4, %10, lsr #16 \n"
+            "orr    %6, %4, %6, lsl #16  \n"
+            "str    %6, [%0], #4         \n"
+            "bhi    1b                   \n"
+            : "+r"(out), "+r"(src0), "+r"(src1), "+r"(size),
+              "=&r"(l0), "=&r"(l1), "=&r"(h0), "=&r"(h1)
+            : "r"(src0_amp), "r"(src1_amp), "r"(0xffff7fff));
+    }
+    else
+    {
+        /* One is unity amplitude */
+        if (src0_amp != MIX_AMP_UNITY)
+        {
+            /* Keep unity in src0, amp0 */
+            int16_t *src_tmp = src0;
+            src0 = src1;
+            src1 = src_tmp;
+            src1_amp = src0_amp;
+            src0_amp = MIX_AMP_UNITY;
+        }
+
+        int32_t l0, l1, h0, h1;
+        asm volatile (
+        "1:                             \n"
+            "ldrsh  %4, [%1], #2        \n"
+            "ldrsh  %5, [%2], #2        \n"
+            "ldrsh  %6, [%1], #2        \n"
+            "ldrsh  %7, [%2], #2        \n"
+            "mul    %5, %8, %5          \n"
+            "mul    %7, %8, %7          \n"
+            "add    %4, %4, %5, asr #16 \n"
+            "add    %6, %6, %7, asr #16 \n"
+            "mov    %5, %4, asr #15     \n"
+            "teq    %5, %5, asr #31     \n"
+            "eorne  %4, %9, %4, asr #31 \n"
+            "mov    %7, %6, asr #15     \n"
+            "teq    %7, %7, asr #31     \n"
+            "eorne  %6, %9, %6, asr #31 \n"
+            "subs   %3, %3, #4          \n"
+            "and    %4, %4, %9, lsr #16 \n"
+            "orr    %6, %4, %6, lsl #16 \n"
+            "str    %6, [%0], #4        \n"
+            "bhi    1b                  \n"
+            : "+r"(out), "+r"(src0), "+r"(src1), "+r"(size),
+              "=&r"(l0), "=&r"(l1), "=&r"(h0), "=&r"(h1)
+            : "r"(src1_amp), "r"(0xffff7fff));
+    }
+}
+
+/* Write channel's samples and apply gain factor */
+static FORCE_INLINE void write_samples(void *out,
+                                       void *src,
+                                       int32_t amp,
+                                       size_t size)
+{
+    if (LIKELY(amp == MIX_AMP_UNITY))
+    {
+        /* Channel is unity amplitude */
+        asm volatile (
+            "ands       r1, %2, #0x1f  \n"
+            "beq        2f             \n"
+        "1:                            \n"
+            "ldr        r0, [%1], #4   \n"
+            "subs       r1, r1, #4     \n"
+            "str        r0, [%0], #4   \n"
+            "bne        1b             \n"
+            "bics       %2, %2, #0x1f  \n"
+            "beq        3f             \n"
+        "2:                            \n"
+            "ldmia      %1!, { r0-r7 } \n"
+            "subs       %2, %2, #32    \n"
+            "stmia      %0!, { r0-r7 } \n"
+            "bhi        2b             \n"
+        "3:                            \n"
+            : "+r"(out), "+r"(src), "+r"(size)
+            :
+            : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7");
+    }
+    else
+    {
+        /* Channel needs amplitude cut */
+        uint32_t l, h;
+        asm volatile (
+        "1:                              \n"
+            "ldrsh   %3, [%1], #2        \n"
+            "ldrsh   %4, [%1], #2        \n"
+            "subs    %2, %2, #4          \n"
+            "mul     %3, %5, %3          \n"
+            "mul     %4, %5, %4          \n"
+            "and     %4, %4, %6, lsl #16 \n"
+            "orr     %4, %4, %3, lsr #16 \n"
+            "str     %4, [%0], #4        \n"
+            "bhi     1b                  \n"
+            : "+r"(out), "+r"(src), "+r"(size),
+              "=&r"(l), "=&r"(h)
+            : "r"(amp), "r"(0xffffffffu));
+    }     
+}
diff --git a/firmware/target/arm/pcm-mixer-armv5.c b/firmware/target/arm/pcm-mixer-armv5.c
new file mode 100644
index 0000000..64f2c86
--- /dev/null
+++ b/firmware/target/arm/pcm-mixer-armv5.c
@@ -0,0 +1,106 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2011 by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#define MIXER_OPTIMIZED_WRITE_SAMPLES
+#define MIXER_OPTIMIZED_MIX_SAMPLES
+
+/* Mix channels' samples and apply gain factors */
+static FORCE_INLINE void mix_samples(void *out,
+                                     void *src0,
+                                     int32_t src0_amp,
+                                     void *src1,
+                                     int32_t src1_amp,
+                                     size_t size)
+{
+    int32_t s0, s1, tmp;
+    asm volatile (
+    "1:                             \n"
+        "ldr    %4, [%1], #4        \n"
+        "ldr    %5, [%2], #4        \n"
+        "smulwb %6, %7, %4          \n"
+        "smulwt %4, %7, %4          \n"
+        "smlawb %6, %8, %5, %6      \n"
+        "smlawt %4, %8, %5, %4      \n"
+        "mov    %5, %6, asr #15     \n"
+        "teq    %5, %5, asr #31     \n"
+        "eorne  %6, %9, %6, asr #31 \n"
+        "mov    %5, %4, asr #15     \n"
+        "teq    %5, %5, asr #31     \n"
+        "eorne  %4, %9, %4, asr #31 \n"
+        "subs   %3, %3, #4          \n"
+        "and    %6, %6, %9, lsr #16 \n"
+        "orr    %6, %6, %4, lsl #16 \n"
+        "str    %6, [%0], #4        \n"
+        "bhi    1b                  \n"
+        : "+r"(out), "+r"(src0), "+r"(src1), "+r"(size),
+          "=&r"(s0), "=&r"(s1), "=&r"(tmp)
+        : "r"(src0_amp), "r"(src1_amp), "r"(0xffff7fff));
+}
+
+/* Write channel's samples and apply gain factor */
+static FORCE_INLINE void write_samples(void *out,
+                                       void *src,
+                                       int32_t amp,
+                                       size_t size)
+{
+    if (LIKELY(amp == MIX_AMP_UNITY))
+    {
+        /* Channel is unity amplitude */
+        asm volatile (
+            "ands   r1, %2, #0x1f  \n"
+            "beq    2f             \n"
+        "1:                        \n"
+            "ldr    r0, [%1], #4   \n"
+            "subs   r1, r1, #4     \n"
+            "str    r0, [%0], #4   \n"
+            "bne    1b             \n"
+            "bics   %2, %2, #0x1f  \n"
+            "beq    3f             \n"
+        "2:                        \n"
+            "ldmia  %1!, { r0-r7 } \n"
+            "subs   %2, %2, #32    \n"
+            "stmia  %0!, { r0-r7 } \n"
+            "bhi    2b             \n"
+        "3:                        \n"
+            : "+r"(out), "+r"(src), "+r"(size)
+            :
+            : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7");
+    }
+    else
+    {
+        /* Channel needs amplitude cut */
+        uint32_t l, h;
+        asm volatile (
+        "1:                             \n"
+            "ldr    %3, [%1], #4        \n"
+            "subs   %2, %2, #4          \n"
+            "smulwt %4, %5, %3          \n"
+            "smulwb %3, %5, %3          \n"
+            "mov    %4, %4, lsl #16     \n"
+            "mov    %3, %3, lsl #16     \n"
+            "orr    %4, %4, %3, lsr #16 \n"
+            "str    %4, [%0], #4        \n"
+            "bhi    1b                  \n"
+            : "+r"(out), "+r"(src), "+r"(size),
+              "=&r"(l), "=&r"(h)
+            : "r"(amp));
+    }     
+}
diff --git a/firmware/target/arm/pcm-mixer-armv6.c b/firmware/target/arm/pcm-mixer-armv6.c
new file mode 100644
index 0000000..94eecd0
--- /dev/null
+++ b/firmware/target/arm/pcm-mixer-armv6.c
@@ -0,0 +1,118 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2011 by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#define MIXER_OPTIMIZED_MIX_SAMPLES
+#define MIXER_OPTIMIZED_WRITE_SAMPLES
+
+/* Mix channels' samples and apply gain factors */
+static FORCE_INLINE void mix_samples(void *out,
+                                     void *src0,
+                                     int32_t src0_amp,
+                                     void *src1,
+                                     int32_t src1_amp,
+                                     size_t size)
+{
+    uint32_t s0, s1;
+
+    if (src0_amp == MIX_AMP_UNITY && src1_amp == MIX_AMP_UNITY)
+    {
+        /* Both are unity amplitude */
+        asm volatile (
+        "1:                      \n"
+            "ldr    %4, [%1], #4 \n"
+            "ldr    %5, [%2], #4 \n"
+            "subs   %3, %3, #4   \n"
+            "qadd16 %5, %5, %4   \n"
+            "str    %5, [%0], #4 \n"
+            "bhi    1b           \n"
+            : "+r"(out), "+r"(src0), "+r"(src1), "+r"(size),
+              "=&r"(s0), "=&r"(s1));
+    }
+    else
+    {
+        /* One or neither are unity amplitude */
+        uint32_t tmp;
+        asm volatile (
+        "1:                             \n"
+            "ldr    %4, [%1], #4        \n"
+            "ldr    %5, [%2], #4        \n"
+            "subs   %3, %3, #4          \n"
+            "smulwb %6, %7, %4          \n"
+            "smulwt %4, %7, %4          \n"
+            "smlawb %6, %8, %5, %6      \n"
+            "smlawt %4, %8, %5, %4      \n"
+            "ssat   %6, #16, %6         \n"
+            "ssat   %4, #16, %4         \n"
+            "pkhbt  %6, %6, %4, asl #16 \n"
+            "str    %6, [%0], #4        \n"
+            "bhi    1b                  \n"
+            : "+r"(out), "+r"(src0), "+r"(src1), "+r"(size),
+              "=&r"(s0), "=&r"(s1), "=&r"(tmp)
+            : "r"(src0_amp), "r"(src1_amp));
+    }
+}
+
+/* Write channel's samples and apply gain factor */
+static FORCE_INLINE void write_samples(void *out,
+                                       void *src,
+                                       int32_t amp,
+                                       size_t size)
+{
+    if (LIKELY(amp == MIX_AMP_UNITY))
+    {
+        /* Channel is unity amplitude */
+        asm volatile (
+            "ands       r1, %2, #0x1f  \n"
+            "beq        2f             \n"
+        "1:                            \n"
+            "ldr        r0, [%1], #4   \n"
+            "subs       r1, r1, #4     \n"
+            "str        r0, [%0], #4   \n"
+            "bne        1b             \n"
+            "bics       %2, %2, #0x1f  \n"
+            "beq        3f             \n"
+        "2:                            \n"
+            "ldmia      %1!, { r0-r7 } \n"
+            "subs       %2, %2, #32    \n"
+            "stmia      %0!, { r0-r7 } \n"
+            "bhi        2b             \n"
+        "3:                            \n"
+            : "+r"(out), "+r"(src), "+r"(size)
+            :
+            : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7");
+    }
+    else
+    {
+        /* Channel needs amplitude cut */
+        uint32_t s, tmp;
+        asm volatile(
+        "1:                             \n"
+            "ldr    %3, [%1], #4        \n"
+            "subs   %2, %2, #4          \n"
+            "smulwt %4, %5, %3          \n"
+            "smulwb %3, %5, %3          \n"
+            "pkhbt  %4, %3, %4, asl #16 \n"
+            "str    %4, [%0], #4        \n"
+            "bhi    1b                  \n"
+            : "+r"(out), "+r"(src), "+r"(size),
+              "=&r"(s), "=&r"(tmp)
+            : "r"(amp));
+    }     
+}
diff --git a/firmware/target/arm/pcm-pp.c b/firmware/target/arm/pcm-pp.c
index c446f98..704296d 100644
--- a/firmware/target/arm/pcm-pp.c
+++ b/firmware/target/arm/pcm-pp.c
@@ -26,6 +26,7 @@
 #include "sound.h"
 #include "pcm.h"
 #include "pcm_sampr.h"
+#include "pcm-internal.h"
 
 /** DMA **/
 
@@ -115,6 +116,7 @@
 /* NOTE: direct stack use forbidden by GCC stack handling bug for FIQ */
 void ICODE_ATTR __attribute__((interrupt("FIQ"))) fiq_playback(void)
 {
+    bool new_buffer = false;
     register size_t size;
 
     DMA0_STATUS; /* Clear any pending interrupt */
@@ -136,9 +138,14 @@
             /* Set the new DMA values and activate channel */
             DMA0_RAM_ADDR = dma_play_data.addr;
             DMA0_CMD = DMA_PLAY_CONFIG | (size - 4) | DMA_CMD_START;
+
+            if (new_buffer)
+                pcm_play_dma_started_callback();
             return;
         }
 
+        new_buffer = true;
+
         /* Buffer empty.  Try to get more. */
         pcm_play_get_more_callback((void **)&dma_play_data.addr,
                                    &dma_play_data.size);
@@ -181,8 +188,9 @@
      * r0-r3 and r12 is a working register.
      */
     asm volatile (
-        "stmfd   sp!, { r0-r3, lr }  \n" /* stack scratch regs and lr */
+        "stmfd   sp!, { r0-r4, lr }  \n" /* stack scratch regs and lr */
 
+        "mov     r4, #0              \n" /* Was the callback called? */
 #if CONFIG_CPU == PP5002
         "ldr     r12, =0xcf001040    \n" /* Some magic from iPodLinux */
         "ldr     r12, [r12]          \n"
@@ -212,16 +220,13 @@
         "tst     r1, #1              \n" /* two samples (one word) left? */
         "ldrne   r12, [r8], #4       \n" /* load two samples */
         "strne   r12, [r10, %[wr]]   \n" /* write sample 0-1 to IISFIFO_WR */
-
-        "cmp     r9, #0              \n" /* either FIFO is full or source buffer is empty */
-        "bgt     .exit               \n" /* if source buffer is not empty, FIFO must be full */
 #elif SAMPLE_SIZE == 32
     ".check_fifo:                    \n"
         "ldr     r0, [r10, %[cfg]]   \n" /* read IISFIFO_CFG to check FIFO status */
         "and     r0, r0, %[mask]     \n" /* r0 = IIS_TX_FREE_COUNT << 23 (PP5002) */
 
         "movs    r1, r0, lsr #24     \n" /* number of free pairs of FIFO slots */
-        "beq     .exit               \n" /* no complete pair? -> exit */
+        "beq     .fifo_fill_complete \n" /* no complete pair? -> exit */
         "cmp     r1, r9, lsr #2      \n" /* number of words from source */
         "movgt   r1, r9, lsr #2      \n" /* r1 = amount of allowed loops */
         "sub     r9, r9, r1, lsl #2  \n" /* r1 words will be written in following loop */
@@ -234,11 +239,23 @@
         "subs    r1, r1, #1          \n" /* one more loop? */
         "bgt     .fifo_loop          \n" /* yes, continue */
 
+    ".fifo_fill_complete:            \n"
+#endif
+        "cmp     r4, #0              \n" /* If fill came after get_more... */
+        "beq     .still_old_buffer   \n"
+        "mov     r4, #0              \n"
+        "ldr     r2, =pcm_play_dma_started \n"
+        "ldrne   r2, [r2]            \n"
+        "cmp     r2, #0              \n"
+        "movne   lr, pc              \n"
+        "bxne    r2                  \n"
+
+    ".still_old_buffer:              \n"
         "cmp     r9, #0              \n" /* either FIFO is full or source buffer is empty */
         "bgt     .exit               \n" /* if source buffer is not empty, FIFO must be full */
-#endif
 
     ".more_data:                     \n"
+        "mov     r4, #1              \n" /* Remember we did this */
         "ldr     r2, =pcm_play_get_more_callback \n"
         "mov     r0, r11             \n" /* r0 = &p */
         "add     r1, r11, #4         \n" /* r1 = &size */
@@ -250,7 +267,7 @@
 
     ".exit:                          \n" /* (r9=0 if stopping, look above) */
         "stmia   r11, { r8-r9 }      \n" /* save p and size */
-        "ldmfd   sp!, { r0-r3, lr }  \n"
+        "ldmfd   sp!, { r0-r4, lr }  \n"
         "subs    pc, lr, #4          \n" /* FIQ specific return sequence */
         ".ltorg                      \n"
         : /* These must only be integers! No regs */
@@ -264,6 +281,8 @@
 /* NOTE: direct stack use forbidden by GCC stack handling bug for FIQ */
 void fiq_playback(void)
 {
+    bool new_buffer = false;
+
 #if CONFIG_CPU == PP5002
     inl(0xcf001040);
 #endif
@@ -271,6 +290,10 @@
     do {
         while (dma_play_data.size > 0) {
             if (IIS_TX_FREE_COUNT < 2) {
+                if (new_buffer) {
+                    new_buffer = false;
+                    pcm_play_dma_started_callback();
+                }
                 return;
             }
 #if SAMPLE_SIZE == 16
@@ -282,9 +305,15 @@
             dma_play_data.size -= 4;
         }
 
+        if (new_buffer) {
+            new_buffer = false;
+            pcm_play_dma_started_callback();
+        }
+
         /* p is empty, get some more data */
         pcm_play_get_more_callback((void **)&dma_play_data.addr,
                                    &dma_play_data.size);
+        new_buffer = true;
     } while (dma_play_data.size);
 
     /* No more data  */
diff --git a/firmware/target/arm/pcm-telechips.c b/firmware/target/arm/pcm-telechips.c
index 851ebee..aff4317 100644
--- a/firmware/target/arm/pcm-telechips.c
+++ b/firmware/target/arm/pcm-telechips.c
@@ -27,6 +27,7 @@
 #include "sound.h"
 #include "i2s.h"
 #include "pcm.h"
+#include "pcm-internal.h"
 
 struct dma_data
 {
@@ -247,6 +248,8 @@
      * r0-r3 and r12 is a working register.
      */
     asm volatile (
+        "stmfd   sp!, { r0-r4, lr }  \n" /* stack scratch regs and lr */
+        "mov     r4, #0              \n" /* Was the callback called? */
 #if defined(CPU_TCC780X)
         "mov     r8, #0xc000         \n" /* DAI_TX_IRQ_MASK | DAI_RX_IRQ_MASK */
         "ldr     r9, =0xf3001004     \n" /* CREQ */
@@ -279,33 +282,41 @@
         "sub     r9, r9, #0x10       \n" /* 4 words written */
         "stmia   r11, { r8-r9 }      \n" /* save p and size */
 
+        "cmp     r4, #0              \n" /* Callback called? */
+        "beq     .exit               \n"
+        /* "mov     r4, #0              \n" If get_more could be called multiple times! */
+        "ldr     r2, =pcm_play_dma_started\n"
+        "ldr     r2, [r2]            \n"
+        "cmp     r2, #0              \n"
+        "blxne   r2                  \n"
+
     ".exit:                          \n"
+        "ldmfd   sp!, { r0-r4, lr }  \n"
         "subs    pc, lr, #4          \n" /* FIQ specific return sequence */
 
     ".more_data:                     \n"
-        "stmfd   sp!, { r0-r3, lr }  \n" /* stack scratch regs and lr */
+        "mov     r4, #1              \n" /* Remember we got more data in this FIQ */
         "ldr     r2, =pcm_play_get_more_callback \n"
         "mov     r0, r11             \n" /* r0 = &p */
         "add     r1, r11, #4         \n" /* r1 = &size */
         "blx     r2                  \n" /* call pcm_play_get_more_callback */
         "ldmia   r11, { r8-r9 }      \n" /* load new p and size */
         "cmp     r9, #0x10           \n" /* did we actually get enough data? */
-        "ldmfd   sp!, { r0-r3, lr }  \n"
         "bpl     .fill_fifo          \n" /* not stop and enough? refill */
         "b       .exit               \n"
         ".ltorg                      \n"
     );
 }
 #else /* C version for reference */
-void fiq_handler(void) ICODE_ATTR __attribute__((naked));
+void fiq_handler(void) ICODE_ATTR;
 void fiq_handler(void)
 {
-    asm volatile(   "stmfd sp!, {r0-r7, ip, lr} \n"   /* Store context */
-                    "sub   sp, sp, #8           \n"); /* Reserve stack */
+    register bool new_buffer = false;
 
     if (dma_play_data.size < 16)
     {
         /* p is empty, get some more data */
+        new_buffer = true;
         pcm_play_get_more_callback((void**)&dma_play_data.p,
                                    &dma_play_data.size);
     }
@@ -327,9 +338,8 @@
     /* Clear FIQ status */
     CREQ = DAI_TX_IRQ_MASK | DAI_RX_IRQ_MASK;
 
-    asm volatile(   "add   sp, sp, #8           \n"   /* Cleanup stack   */
-                    "ldmfd sp!, {r0-r7, ip, lr} \n"   /* Restore context */
-                    "subs  pc, lr, #4           \n"); /* Return from FIQ */
+    if (new_buffer)
+        pcm_play_dma_started_callback();
 }
 #endif
 
diff --git a/firmware/target/arm/pnx0101/pcm-pnx0101.c b/firmware/target/arm/pnx0101/pcm-pnx0101.c
index 9d4ffbd..d4c1745 100644
--- a/firmware/target/arm/pnx0101/pcm-pnx0101.c
+++ b/firmware/target/arm/pnx0101/pcm-pnx0101.c
@@ -21,6 +21,7 @@
 #include "system.h"
 #include "audio.h"
 #include "string.h"
+#include "pcm-internal.h"
 
 #define DMA_BUF_SAMPLES 0x100
 
@@ -63,6 +64,8 @@
 
     if (pcm_playing && !pcm_paused)
     {
+        bool new_buffer =false;
+
         do
         {
             int count;
@@ -102,10 +105,20 @@
                 count--;
             }
             p = tmp_p;
+
+            if (new_buffer)
+            {
+                new_buffer = false;
+                pcm_play_dma_started_callback();
+            }
+
             if (l >= lend)
                 return;
 
             pcm_play_get_more_callback((void**)&p, &p_size);
+
+            if (p_size)
+                new_buffer = true;
         }
         while (p_size);
     }
diff --git a/firmware/target/arm/s3c2440/gigabeat-fx/pcm-meg-fx.c b/firmware/target/arm/s3c2440/gigabeat-fx/pcm-meg-fx.c
index c1c9017..33194ae 100644
--- a/firmware/target/arm/s3c2440/gigabeat-fx/pcm-meg-fx.c
+++ b/firmware/target/arm/s3c2440/gigabeat-fx/pcm-meg-fx.c
@@ -25,6 +25,7 @@
 #include "audio.h"
 #include "sound.h"
 #include "file.h"
+#include "pcm-internal.h"
 
 /* PCM interrupt routine lockout */
 static struct
@@ -235,6 +236,8 @@
 
     /* Re-Activate the channel */
     DMASKTRIG2 = 0x2;
+
+    pcm_play_dma_started_callback();
 }
 
 size_t pcm_get_bytes_waiting(void)
diff --git a/firmware/target/arm/s3c2440/gigabeat-fx/wmcodec-meg-fx.c b/firmware/target/arm/s3c2440/gigabeat-fx/wmcodec-meg-fx.c
index 01b177d..eea4c58 100644
--- a/firmware/target/arm/s3c2440/gigabeat-fx/wmcodec-meg-fx.c
+++ b/firmware/target/arm/s3c2440/gigabeat-fx/wmcodec-meg-fx.c
@@ -99,14 +99,15 @@
     INTPND = TIMER3_MASK;
 }
 
-void pcmbuf_beep(unsigned int frequency, size_t duration, int amplitude)
+void beep_play(unsigned int frequency, unsigned int duration,
+               unsigned int amplitude)
 {
     #define TIMER3_TICK_SEC (TIMER_FREQ / TIMER234_PRESCALE)
 
     unsigned long tcnt, tcmp;
     int oldstatus;
 
-    if (amplitude <= 0)
+    if (frequency == 0 || duration == 0 || amplitude == 0)
     {
         beep_stop(); /* won't hear it anyway */
         return;
diff --git a/firmware/target/arm/s3c2440/mini2440/pcm-mini2440.c b/firmware/target/arm/s3c2440/mini2440/pcm-mini2440.c
index 8a6b62f..0c69c1e 100644
--- a/firmware/target/arm/s3c2440/mini2440/pcm-mini2440.c
+++ b/firmware/target/arm/s3c2440/mini2440/pcm-mini2440.c
@@ -26,6 +26,7 @@
 #include "audio.h"
 #include "sound.h"
 #include "file.h"
+#include "pcm-internal.h"
 
 /* PCM interrupt routine lockout */
 static struct
@@ -275,6 +276,8 @@
 
     /* Re-Activate the channel */
     DMASKTRIG2 = 0x2;
+
+    pcm_play_dma_started_callback();
 }
 
 size_t pcm_get_bytes_waiting(void)
diff --git a/firmware/target/arm/s5l8700/pcm-s5l8700.c b/firmware/target/arm/s5l8700/pcm-s5l8700.c
index 08086c3..14c515e 100644
--- a/firmware/target/arm/s5l8700/pcm-s5l8700.c
+++ b/firmware/target/arm/s5l8700/pcm-s5l8700.c
@@ -27,6 +27,7 @@
 #include "panic.h"
 #include "audiohw.h"
 #include "pcm.h"
+#include "pcm-internal.h"
 #include "pcm_sampr.h"
 #include "dma-target.h"
 #include "mmu-arm.h"
@@ -100,6 +101,7 @@
 void INT_DMA(void) ICODE_ATTR;
 void INT_DMA(void)
 {
+    bool new_buffer = false;
     DMACOM0 = 7;
     while (!(DMACON0 & (1 << 18)))
     {
@@ -112,8 +114,12 @@
         }
         else
         {
-            if (!nextsize) pcm_play_get_more_callback((void**)&nextbuf, &nextsize);
-            if (!nextsize) break;
+            if (!nextsize)
+            {
+                pcm_play_get_more_callback((void**)&nextbuf, &nextsize);
+                if (!nextsize) break;
+                new_buffer = true;
+            }
             queuedsize = MIN(sizeof(dblbuf), nextsize / 2);
             nextsize -= queuedsize;
             queuedbuf = nextbuf + nextsize;
@@ -124,7 +130,14 @@
         clean_dcache();
         DMACOM0 = 4;
         DMACOM0 = 7;
+
+        if (new_buffer)
+        {
+            pcm_play_dma_started_callback();
+            new_buffer = false;
+        }
     }
+
 }
 
 void pcm_play_dma_start(const void* addr, size_t size)
diff --git a/firmware/target/arm/s5l8702/pcm-s5l8702.c b/firmware/target/arm/s5l8702/pcm-s5l8702.c
index c0498a9..dbadf3b 100644
--- a/firmware/target/arm/s5l8702/pcm-s5l8702.c
+++ b/firmware/target/arm/s5l8702/pcm-s5l8702.c
@@ -27,6 +27,7 @@
 #include "panic.h"

 #include "audiohw.h"

 #include "pcm.h"

+#include "pcm-internal.h"

 #include "pcm_sampr.h"

 #include "mmu-arm.h"

 #include "pcm-target.h"

@@ -113,6 +114,8 @@
         DMAC0C0CONFIG = 0x8a81;

     }

     else DMAC0C0NEXTLLI = pcm_lli;

+

+    pcm_play_dma_started_callback();

 }

 

 void pcm_play_dma_start(const void* addr, size_t size)

diff --git a/firmware/target/arm/tms320dm320/creative-zvm/pcm-creativezvm.c b/firmware/target/arm/tms320dm320/creative-zvm/pcm-creativezvm.c
index 3c54ce8..5ec62cf 100644
--- a/firmware/target/arm/tms320dm320/creative-zvm/pcm-creativezvm.c
+++ b/firmware/target/arm/tms320dm320/creative-zvm/pcm-creativezvm.c
@@ -27,6 +27,7 @@
 #include "dm320.h"
 #include "audiohw.h"
 #include "dsp-target.h"
+#include "pcm-internal.h"
 
 void pcm_play_dma_init(void)
 {
diff --git a/firmware/target/arm/tms320dm320/mrobe-500/pcm-mr500.c b/firmware/target/arm/tms320dm320/mrobe-500/pcm-mr500.c
index fb94ada..90c342e 100644
--- a/firmware/target/arm/tms320dm320/mrobe-500/pcm-mr500.c
+++ b/firmware/target/arm/tms320dm320/mrobe-500/pcm-mr500.c
@@ -28,6 +28,7 @@
 #include "dsp-target.h"
 #include "dsp/ipc.h"
 #include "mmu-arm.h"
+#include "pcm-internal.h"
 
 /* This is global to save some latency when pcm_play_dma_get_peak_buffer is 
  *  called.
@@ -178,6 +179,8 @@
             
             DEBUGF("pcm_sdram at 0x%08lx, sdem_addr 0x%08lx",
                 (unsigned long)start, (unsigned long)sdem_addr);
+
+            pcm_play_dma_started_callback();
         }
         
         break;
diff --git a/firmware/target/coldfire/pcm-coldfire.c b/firmware/target/coldfire/pcm-coldfire.c
index a06542c..85eeaec 100644
--- a/firmware/target/coldfire/pcm-coldfire.c
+++ b/firmware/target/coldfire/pcm-coldfire.c
@@ -28,6 +28,7 @@
 #if defined(HAVE_SPDIF_REC) || defined(HAVE_SPDIF_OUT)
 #include "spdif.h"
 #endif
+#include "pcm-internal.h"
 
 #define IIS_PLAY_DEFPARM ( (freq_ent[FPARM_CLOCKSEL] << 12) | \
                            (IIS_PLAY & (7 << 8)) | \
@@ -318,6 +319,9 @@
         SAR0 = (unsigned long)start;     /* Source address */
         BCR0 = size;                     /* Bytes to transfer */
         or_l(DMA_EEXT | DMA_INT, &DCR0); /* per request and int ON */
+
+        /* Call buffer callback */
+        pcm_play_dma_started_callback();
     }
     /* else inished playing */
 } /* DMA0 */
diff --git a/firmware/target/coldfire/pcm-mixer-coldfire.c b/firmware/target/coldfire/pcm-mixer-coldfire.c
new file mode 100644
index 0000000..d8318ff
--- /dev/null
+++ b/firmware/target/coldfire/pcm-mixer-coldfire.c
@@ -0,0 +1,134 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2011 by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#define MIXER_OPTIMIZED_MIX_SAMPLES
+#define MIXER_OPTIMIZED_WRITE_SAMPLES
+static struct emac_context
+{
+    unsigned long r[4];
+} emac_context IBSS_ATTR;
+
+/* Save emac context affected in ISR */
+static FORCE_INLINE void save_emac_context(void)
+{
+    asm volatile (
+        "move.l   %%macsr, %%d0             \n"
+        "move.l   %%accext01, %%d1          \n"
+        "movclr.l %%acc0, %%a0              \n"
+        "movclr.l %%acc1, %%a1              \n"
+        "movem.l  %%d0-%%d1/%%a0-%%a1, (%0) \n"
+        :
+        : "a"(&emac_context)
+        : "d0", "d1", "a0", "a1");
+}
+
+/* Restore emac context affected in ISR */
+static FORCE_INLINE void restore_emac_context(void)
+{
+    asm volatile (
+        "movem.l (%0), %%d0-%%d1/%%a0-%%a1  \n"
+        "move.l  %%a1, %%acc1               \n"
+        "move.l  %%a0, %%acc0               \n"
+        "move.l  %%d1, %%accext01           \n"
+        "move.l  %%d0, %%macsr              \n"
+        :
+        : "a"(&emac_context)
+        : "d0", "d1", "a0", "a1");
+}
+
+/* Mix channels' samples and apply gain factors */
+static FORCE_INLINE void mix_samples(void *out,
+                                     void *src0,
+                                     int32_t src0_amp,
+                                     void *src1,
+                                     int32_t src1_amp,
+                                     size_t size)
+{
+    uint32_t s0, s1, s2, s3;
+    save_emac_context();
+    coldfire_set_macsr(EMAC_ROUND | EMAC_SATURATE);
+
+    asm volatile (
+        "move.l     (%1)+, %5                 \n"
+    "1:                                       \n"
+        "movea.w    %5, %4                    \n"
+        "asr.l      %10, %5                   \n"
+        "mac.l      %4, %8,            %%acc0 \n"
+        "mac.l      %5, %8, (%2)+, %5, %%acc1 \n"
+        "movea.w    %5, %4                    \n"
+        "asr.l      %10, %5                   \n"
+        "mac.l      %4, %9,            %%acc0 \n"
+        "mac.l      %5, %9, (%1)+, %5, %%acc1 \n"
+        "movclr.l   %%acc0, %6                \n"
+        "movclr.l   %%acc1, %7                \n"
+        "swap.w     %6                        \n"
+        "move.w     %6, %7                    \n"
+        "move.l     %7, (%0)+                 \n"
+        "subq.l     #4, %3                    \n"
+        "bhi.b      1b                        \n"
+        : "+a"(out), "+a"(src0), "+a"(src1), "+d"(size),
+          "=&a"(s0), "=&d"(s1), "=&d"(s2), "=&d"(s3)
+        : "r"(src0_amp), "r"(src1_amp), "d"(16)
+    );
+
+    restore_emac_context();
+}
+
+/* Write channel's samples and apply gain factor */
+static FORCE_INLINE void write_samples(void *out,
+                                       void *src,
+                                       int32_t amp,
+                                       size_t size)
+{
+    if (LIKELY(amp == MIX_AMP_UNITY))
+    {
+        /* Channel is unity amplitude */
+        memcpy(out, src, size);
+    }
+    else
+    {
+        /* Channel needs amplitude cut */
+        uint32_t s0, s1, s2, s3;
+        save_emac_context();
+        coldfire_set_macsr(EMAC_ROUND | EMAC_SATURATE);
+
+        asm volatile (
+            "move.l     (%1)+, %4                 \n"
+        "1:                                       \n"
+            "movea.w    %4, %3                    \n"
+            "asr.l      %8, %4                    \n"
+            "mac.l      %3, %7,            %%acc0 \n"
+            "mac.l      %4, %7, (%1)+, %4, %%acc1 \n"
+            "movclr.l   %%acc0, %5                \n"
+            "movclr.l   %%acc1, %6                \n"
+            "swap.w     %5                        \n"
+            "move.w     %5, %6                    \n"
+            "move.l     %6, (%0)+                 \n"
+            "subq.l     #4, %2                    \n"
+            "bhi.b      1b                        \n"
+            : "+a"(out), "+a"(src), "+d"(size),
+              "=&a"(s0), "=&d"(s1), "=&d"(s2), "=&d"(s3)
+            : "r"(amp), "d"(16)
+        );
+
+        restore_emac_context();
+    }     
+}
diff --git a/firmware/target/hosted/android/pcm-android.c b/firmware/target/hosted/android/pcm-android.c
index 88792cd..cbd6cb3 100644
--- a/firmware/target/hosted/android/pcm-android.c
+++ b/firmware/target/hosted/android/pcm-android.c
@@ -23,14 +23,18 @@
 #include <stdbool.h>
 #define _SYSTEM_WITH_JNI /* for getJavaEnvironment */
 #include <system.h>
+#include <pthread.h>
 #include "debug.h"
 #include "pcm.h"
+#include "pcm-internal.h"
 
 extern JNIEnv *env_ptr;
 
 /* infos about our pcm chunks */
 static size_t  pcm_data_size;
 static char   *pcm_data_start;
+static int     audio_locked = 0;
+static pthread_mutex_t audio_lock_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 /* cache frequently called methods */
 static jmethodID play_pause_method;
@@ -42,6 +46,20 @@
 
 
 /*
+ * mutex lock/unlock wrappers neatness' sake
+ */
+static inline void lock_audio(void)
+{
+    pthread_mutex_lock(&audio_lock_mutex);
+}
+
+static inline void unlock_audio(void)
+{
+    pthread_mutex_unlock(&audio_lock_mutex);
+}
+
+
+/*
  * write pcm samples to the hardware. Calls AudioTrack.write directly (which
  * is usually a blocking call)
  *
@@ -54,10 +72,17 @@
 Java_org_rockbox_RockboxPCM_nativeWrite(JNIEnv *env, jobject this,
                                         jbyteArray temp_array, jint max_size)
 {
+    bool new_buffer = false;
+
+    lock_audio();
+
     jint left = max_size;
 
     if (!pcm_data_size) /* get some initial data */
+    {
+        new_buffer = true;
         pcm_play_get_more_callback((void**) &pcm_data_start, &pcm_data_size);
+    }
 
     while(left > 0 && pcm_data_size)
     {
@@ -70,23 +95,49 @@
 
         ret = (*env)->CallIntMethod(env, this, write_method,
                                             temp_array, 0, transfer_size);
+
+        if (new_buffer)
+        {
+            new_buffer = false;
+            pcm_play_dma_started_callback();
+
+            /* NOTE: might need to release the mutex and sleep here if the
+               buffer is shorter than the required buffer (like pcm-sdl.c) to
+               have the mixer clocked at a regular interval */
+        }
+
         if (ret < 0)
+        {
+            unlock_audio();
             return ret;
+        }
 
         if (pcm_data_size == 0) /* need new data */
+        {
+            new_buffer = true;
             pcm_play_get_more_callback((void**)&pcm_data_start, &pcm_data_size);
+        }
         else /* increment data pointer and feed more */
             pcm_data_start += transfer_size;
     }
+
+    if (new_buffer && pcm_data_size)
+        pcm_play_dma_started_callback();
+
+    unlock_audio();
     return max_size - left;
 }
 
 void pcm_play_lock(void)
 {
+    if (++audio_locked == 1)
+        lock_audio();
 }
 
 void pcm_play_unlock(void)
 {
+    if (--audio_locked == 0)
+        unlock_audio();
 }
 
 void pcm_dma_apply_settings(void)
@@ -153,8 +204,6 @@
     set_volume_method = e->GetMethodID(env_ptr, RockboxPCM_class, "set_volume", "(I)V");
     stop_method       = e->GetMethodID(env_ptr, RockboxPCM_class, "stop", "()V");
     write_method      = e->GetMethodID(env_ptr, RockboxPCM_class, "write", "([BII)I");
-    /* get initial pcm data, if any */
-    pcm_play_get_more_callback((void*)&pcm_data_start, &pcm_data_size);
 }
 
 void pcm_postinit(void)
@@ -173,6 +222,7 @@
     JNIEnv e = *env_ptr;
     jmethodID release = e->GetMethodID(env_ptr, RockboxPCM_class, "release", "()V");
     e->CallVoidMethod(env_ptr, RockboxPCM_instance, release);
+    pthread_mutex_destroy(&audio_lock_mutex);
 }
     
 /* Due to limitations of default_event_handler(), parameters gets swallowed when
diff --git a/firmware/target/hosted/maemo/pcm-gstreamer.c b/firmware/target/hosted/maemo/pcm-gstreamer.c
index e3e40f0..6069801 100644
--- a/firmware/target/hosted/maemo/pcm-gstreamer.c
+++ b/firmware/target/hosted/maemo/pcm-gstreamer.c
@@ -54,6 +54,7 @@
 #endif
 
 #include "pcm.h"
+#include "pcm-internal.h"
 #include "pcm_sampr.h"
 
 /*#define LOGF_ENABLE*/
@@ -182,6 +183,8 @@
 
         if (ret != 0)
             DEBUGF("push-buffer error result: %d\n", ret);
+
+        pcm_play_dma_started_callback();
     } else
     {
         DEBUGF("feed_data: No Data.\n");
diff --git a/firmware/target/hosted/sdl/pcm-sdl.c b/firmware/target/hosted/sdl/pcm-sdl.c
index 7780083..dfdd90f 100644
--- a/firmware/target/hosted/sdl/pcm-sdl.c
+++ b/firmware/target/hosted/sdl/pcm-sdl.c
@@ -30,6 +30,7 @@
 #include "sound.h"
 #include "audiohw.h"
 #include "system.h"
+#include "panic.h"
 
 #ifdef HAVE_RECORDING
 #include "audiohw.h"
@@ -39,6 +40,7 @@
 #endif
 
 #include "pcm.h"
+#include "pcm-internal.h"
 #include "pcm_sampr.h"
 
 /*#define LOGF_ENABLE*/
@@ -71,15 +73,19 @@
 
 static SDL_AudioSpec obtained;
 static SDL_AudioCVT cvt;
+static int audio_locked = 0;
+static SDL_mutex *audio_lock;
 
 void pcm_play_lock(void)
 {
-    SDL_LockAudio();
+    if (++audio_locked == 1)
+        SDL_LockMutex(audio_lock);
 }
 
 void pcm_play_unlock(void)
 {
-    SDL_UnlockAudio();
+    if (--audio_locked == 0)
+        SDL_UnlockMutex(audio_lock);
 }
 
 static void pcm_dma_apply_settings_nolock(void)
@@ -227,14 +233,19 @@
 static void sdl_audio_callback(struct pcm_udata *udata, Uint8 *stream, int len)
 {
     logf("sdl_audio_callback: len %d, pcm %d\n", len, pcm_data_size);
+
+    bool new_buffer = false;
     udata->stream = stream;
 
+    SDL_LockMutex(audio_lock);
+
     /* Write what we have in the PCM buffer */
     if (pcm_data_size > 0)
         goto start;
 
     /* Audio card wants more? Get some more then. */
     while (len > 0) {
+        new_buffer = true;
         pcm_play_get_more_callback((void **)&pcm_data, &pcm_data_size);
     start:
         if (pcm_data_size != 0) {
@@ -246,6 +257,28 @@
             udata->num_in  *= pcm_sample_bytes;
             udata->num_out *= pcm_sample_bytes;
 
+
+            if (new_buffer)
+            {
+                new_buffer = false;
+                pcm_play_dma_started_callback();
+
+                if ((size_t)len > udata->num_out)
+                {
+                    int delay = pcm_data_size*250 / pcm_sampr - 1;
+                
+                    if (delay > 0)
+                    {
+                        SDL_UnlockMutex(audio_lock);
+                        SDL_Delay(delay);
+                        SDL_LockMutex(audio_lock);
+
+                        if (!pcm_is_playing())
+                            break;
+                    }
+                }
+            }
+
             pcm_data      += udata->num_in;
             pcm_data_size -= udata->num_in;
             udata->stream += udata->num_out;
@@ -255,6 +288,8 @@
             break;
         }
     }
+
+    SDL_UnlockMutex(audio_lock);
 }
 
 const void * pcm_play_dma_get_peak_buffer(int *count)
@@ -320,6 +355,14 @@
         return;
     }
 
+    audio_lock = SDL_CreateMutex();
+
+    if (!audio_lock)
+    {
+        panicf("Could not create audio_lock\n");
+        return;
+    }
+
     SDL_AudioSpec wanted_spec;
 #ifdef DEBUG
     udata.debug = NULL;
diff --git a/firmware/target/mips/ingenic_jz47xx/pcm-jz4740.c b/firmware/target/mips/ingenic_jz47xx/pcm-jz4740.c
index 5cd9c33..cfc3c9ef 100644
--- a/firmware/target/mips/ingenic_jz47xx/pcm-jz4740.c
+++ b/firmware/target/mips/ingenic_jz47xx/pcm-jz4740.c
@@ -25,6 +25,7 @@
 #include "audio.h"
 #include "sound.h"
 #include "pcm.h"
+#include "pcm-internal.h"
 #include "jz4740.h"
 
 
@@ -109,6 +110,7 @@
     {
         set_dma(start, size);
         REG_DMAC_DCCSR(DMA_AIC_TX_CHANNEL) |= DMAC_DCCSR_EN;
+        pcm_play_dma_started_callback();
     }
 }