X-Git-Url: http://git.osdn.jp/view?a=blobdiff_plain;f=libhb%2Fdecavcodec.c;h=abf7ed179f8cee5a86ca44e56d4d414f8fcf6271;hb=1d6fbf402512f7cbba3c9dac7e10a72aeebd1d81;hp=d050e39f9fde00882b7ae17a28a97af56c1a7159;hpb=78727a276690975bf563af6b4ad012b80b186689;p=handbrake-jp%2Fhandbrake-jp-git.git

diff --git a/libhb/decavcodec.c b/libhb/decavcodec.c
index d050e39f..abf7ed17 100644
--- a/libhb/decavcodec.c
+++ b/libhb/decavcodec.c
@@ -4,10 +4,65 @@
    Homepage: <http://handbrake.fr/>.
    It may be used under the terms of the GNU General Public License. */
 
-#include "hb.h"
+/* This module is Handbrake's interface to the ffmpeg decoder library
+   (libavcodec & small parts of libavformat). It contains four Handbrake
+   "work objects":
+
+    decavcodec  connects HB to an ffmpeg audio decoder
+    decavcodecv connects HB to an ffmpeg video decoder
+
+        (Two different routines are needed because the ffmpeg library
+        has different decoder calling conventions for audio & video.
+        The audio decoder should have had its name changed to "decavcodeca"
+        but I got lazy.) These work objects are self-contained & follow all
+        of HB's conventions for a decoder module. They can be used like
+        any other HB decoder (deca52, decmpeg2, etc.).
+
+    decavcodecai "internal" (incestuous?) version of decavcodec
+    decavcodecvi "internal" (incestuous?) version of decavcodecv
+
+        These routine are functionally equivalent to the routines above but
+        can only be used by the ffmpeg-based stream reader in libhb/stream.c.
+        The reason they exist is because the ffmpeg library leaves some of
+        the information needed by the decoder in the AVStream (the data
+        structure used by the stream reader) and we need to retrieve it
+        to successfully decode frames. But in HB the reader and decoder
+        modules are in completely separate threads and nothing goes between
+        them but hb_buffers containing frames to be decoded. I.e., there's
+        no easy way for the ffmpeg stream reader to pass a pointer to its
+        AVStream over to the ffmpeg video or audio decoder. So the *i work
+        objects use a private back door to the stream reader to get access
+        to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
+        and the codec_param passed to these work objects is the key to this
+        back door (it's basically an index that allows the correct AVStream
+        to be retrieved).
+
+    The normal & *i objects share a lot of code (the basic frame decoding
+    and bitstream info code is factored out into subroutines that can be
+    called by either) but the top level routines of the *i objects
+    (decavcodecviWork, decavcodecviInfo, etc.) are different because:
+     1) they *have* to use the AVCodecContext that's contained in the
+        reader's AVStream rather than just allocating & using their own,
+     2) the Info routines have access to stuff kept in the AVStream in addition
+        to stuff kept in the AVCodecContext. This shouldn't be necessary but
+        crucial information like video frame rate that should be in the
+        AVCodecContext is either missing or wrong in the version of ffmpeg
+        we're currently using.
+
+    A consequence of the above is that the non-i work objects *can't* use
+    information from the AVStream because there isn't one - they get their
+    data from either the dvd reader or the mpeg reader, not the ffmpeg stream
+    reader. That means that they have to make up for deficiencies in the
+    AVCodecContext info by using stuff kept in the HB "title" struct. It
+    also means that ffmpeg codecs that randomly scatter state needed by
+    the decoder across both the AVCodecContext & the AVStream (e.g., the
+    VC1 decoder) can't easily be used by the HB mpeg stream reader.
+ */
 
-#include "libavcodec/avcodec.h"
-#include "libavformat/avformat.h"
+#include "hb.h"
+#include "hbffmpeg.h"
+#include "downmix.h"
+#include "libavcodec/audioconvert.h"
 
 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
@@ -26,22 +81,100 @@ hb_work_object_t hb_decavcodec =
     decavcodecBSInfo
 };
 
+#define HEAP_SIZE 8
+typedef struct {
+    // there are nheap items on the heap indexed 1..nheap (i.e., top of
+    // heap is 1). The 0th slot is unused - a marker is put there to check
+    // for overwrite errs.
+    int64_t h[HEAP_SIZE+1];
+    int     nheap;
+} pts_heap_t;
+
 struct hb_work_private_s
 {
-    hb_job_t             *job;
-    AVCodecContext       *context;
+    hb_job_t        *job;
+    AVCodecContext  *context;
     AVCodecParserContext *parser;
-    hb_list_t            *list;
-    double               pts_next;  // next pts we expect to generate
-    int64_t              pts;       // (video) pts passing from parser to decoder
-    int64_t              chap_time; // time of next chap mark (if new_chap != 0)
-    int                  new_chap;
-    int                  ignore_pts; // workaround M$ bugs
-    int                  nframes;
-    int                  ndrops;
-    double               duration;  // frame duration (for video)
+    hb_list_t       *list;
+    double          duration;   // frame duration (for video)
+    double          pts_next;   // next pts we expect to generate
+    int64_t         pts;        // (video) pts passing from parser to decoder
+    int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
+    int             new_chap;   // output chapter mark pending
+    uint32_t        nframes;
+    uint32_t        ndrops;
+    uint32_t        decode_errors;
+    int             brokenByMicrosoft; // video stream may contain packed b-frames
+    hb_buffer_t*    delayq[HEAP_SIZE];
+    pts_heap_t      pts_heap;
+    void*           buffer;
+    struct SwsContext *sws_context; // if we have to rescale or convert color space
+    hb_downmix_t    *downmix;
+    hb_sample_t     *downmix_buffer;
 };
 
+static void decodeAudio( hb_audio_t * audio, hb_work_private_t *pv, uint8_t *data, int size );
+static hb_buffer_t *link_buf_list( hb_work_private_t *pv );
+
+
+static int64_t heap_pop( pts_heap_t *heap )
+{
+    int64_t result;
+
+    if ( heap->nheap <= 0 )
+    {
+        return -1;
+    }
+
+    // return the top of the heap then put the bottom element on top,
+    // decrease the heap size by one & rebalence the heap.
+    result = heap->h[1];
+
+    int64_t v = heap->h[heap->nheap--];
+    int parent = 1;
+    int child = parent << 1;
+    while ( child <= heap->nheap )
+    {
+        // find the smallest of the two children of parent
+        if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
+            ++child;
+
+        if (v <= heap->h[child])
+            // new item is smaller than either child so it's the new parent.
+            break;
+
+        // smallest child is smaller than new item so move it up then
+        // check its children.
+        int64_t hp = heap->h[child];
+        heap->h[parent] = hp;
+        parent = child;
+        child = parent << 1;
+    }
+    heap->h[parent] = v;
+    return result;
+}
+
+static void heap_push( pts_heap_t *heap, int64_t v )
+{
+    if ( heap->nheap < HEAP_SIZE )
+    {
+        ++heap->nheap;
+    }
+
+    // stick the new value on the bottom of the heap then bubble it
+    // up to its correct spot.
+	int child = heap->nheap;
+	while (child > 1) {
+		int parent = child >> 1;
+		if (heap->h[parent] <= v)
+			break;
+		// move parent down
+		int64_t hp = heap->h[parent];
+		heap->h[child] = hp;
+		child = parent;
+	}
+	heap->h[child] = v;
+}
 
 
 /***********************************************************************
@@ -57,16 +190,27 @@ static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
     w->private_data = pv;
 
     pv->job   = job;
+    pv->list  = hb_list_init();
 
     int codec_id = w->codec_param;
     /*XXX*/
     if ( codec_id == 0 )
         codec_id = CODEC_ID_MP2;
+
     codec = avcodec_find_decoder( codec_id );
     pv->parser = av_parser_init( codec_id );
 
     pv->context = avcodec_alloc_context();
-    avcodec_open( pv->context, codec );
+    hb_avcodec_open( pv->context, codec );
+
+    if ( w->audio != NULL &&
+         hb_need_downmix( w->audio->config.in.channel_layout, 
+                          w->audio->config.out.mixdown) )
+    {
+        pv->downmix = hb_downmix_init(w->audio->config.in.channel_layout, 
+                                      w->audio->config.out.mixdown);
+        hb_downmix_set_chan_map( pv->downmix, &hb_smpte_chan_map, &hb_qt_chan_map );
+    }
 
     return 0;
 }
@@ -79,17 +223,47 @@ static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 static void decavcodecClose( hb_work_object_t * w )
 {
     hb_work_private_t * pv = w->private_data;
-    if ( pv->parser )
-	{
-		av_parser_close(pv->parser);
-	}
-    if ( pv->context && pv->context->codec )
-    {
-        avcodec_close( pv->context );
-    }
-    if ( pv->list )
+
+    if ( pv )
     {
-        hb_list_close( &pv->list );
+        if ( pv->job && pv->context && pv->context->codec )
+        {
+            hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
+                    pv->context->codec->name, pv->nframes, pv->decode_errors,
+                    pv->ndrops );
+        }
+        if ( pv->sws_context )
+        {
+            sws_freeContext( pv->sws_context );
+        }
+        if ( pv->parser )
+        {
+            av_parser_close(pv->parser);
+        }
+        if ( pv->context && pv->context->codec )
+        {
+            hb_avcodec_close( pv->context );
+        }
+        if ( pv->list )
+        {
+            hb_list_close( &pv->list );
+        }
+        if ( pv->buffer )
+        {
+            av_free( pv->buffer );
+            pv->buffer = NULL;
+        }
+        if ( pv->downmix )
+        {
+            hb_downmix_close( &(pv->downmix) );
+        }
+        if ( pv->downmix_buffer )
+        {
+            free( pv->downmix_buffer );
+            pv->downmix_buffer = NULL;
+        }
+        free( pv );
+        w->private_data = NULL;
     }
 }
 
@@ -102,80 +276,66 @@ static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
                     hb_buffer_t ** buf_out )
 {
     hb_work_private_t * pv = w->private_data;
-    hb_buffer_t * in = *buf_in, * buf, * last = NULL;
-    int   pos, len, out_size, i, uncompressed_len;
-    short buffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
-    uint64_t cur;
-    unsigned char *parser_output_buffer;
-    int parser_output_buffer_len;
+    hb_buffer_t * in = *buf_in;
+
+    if ( in->size <= 0 )
+    {
+        /* EOF on input stream - send it downstream & say that we're done */
+        *buf_out = in;
+        *buf_in = NULL;
+        return HB_WORK_DONE;
+    }
 
     *buf_out = NULL;
 
-    cur = ( in->start < 0 )? pv->pts_next : in->start;
+    if ( in->start < -1 && pv->pts_next <= 0 )
+    {
+        // discard buffers that start before video time 0
+        return HB_WORK_OK;
+    }
 
-    pos = 0;
-    while( pos < in->size )
+    // if the packet has a timestamp use it 
+    if ( in->start != -1 )
     {
-        len = av_parser_parse( pv->parser, pv->context,
-                               &parser_output_buffer, &parser_output_buffer_len,
-                               in->data + pos, in->size - pos, cur, cur );
-        out_size = 0;
-        uncompressed_len = 0;
-        if (parser_output_buffer_len)
+        pv->pts_next = in->start;
+    }
+
+    int pos, len;
+    for ( pos = 0; pos < in->size; pos += len )
+    {
+        uint8_t *parser_output_buffer;
+        int parser_output_buffer_len;
+        int64_t cur = pv->pts_next;
+
+        if ( pv->parser != NULL )
         {
-            out_size = sizeof(buffer);
-            uncompressed_len = avcodec_decode_audio2( pv->context, buffer,
-                                                      &out_size,
-                                                      parser_output_buffer,
-                                                      parser_output_buffer_len );
+            len = av_parser_parse2( pv->parser, pv->context,
+                    &parser_output_buffer, &parser_output_buffer_len,
+                    in->data + pos, in->size - pos, cur, cur, AV_NOPTS_VALUE );
         }
-        if( out_size )
+        else
         {
-            short * s16;
-            float * fl32;
-
-            buf = hb_buffer_init( 2 * out_size );
-
-            int sample_size_in_bytes = 2;   // Default to 2 bytes
-            switch (pv->context->sample_fmt)
-            {
-              case SAMPLE_FMT_S16:
-                sample_size_in_bytes = 2;
-                break;
-              /* We should handle other formats here - but that needs additional format conversion work below */
-              /* For now we'll just report the error and try to carry on */
-              default:
-                hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
-                break;
-            }
-
-            buf->start = cur;
-            buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
-                         pv->context->sample_rate;
-            cur = buf->stop;
-
-            s16  = buffer;
-            fl32 = (float *) buf->data;
-            for( i = 0; i < out_size / 2; i++ )
-            {
-                fl32[i] = s16[i];
-            }
-
-            if( last )
-            {
-                last = last->next = buf;
-            }
-            else
+            parser_output_buffer = in->data;
+            len = parser_output_buffer_len = in->size;
+        }
+        if (parser_output_buffer_len)
+        {
+            // set the duration on every frame since the stream format can
+            // change (it shouldn't but there's no way to guarantee it).
+            // duration is a scaling factor to go from #bytes in the decoded
+            // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
+            // total samples to per-channel samples. 'sample_rate' converts
+            // per-channel samples to seconds per sample and the 90000
+            // is mpeg ticks per second.
+            if ( pv->context->sample_rate && pv->context->channels )
             {
-                *buf_out = last = buf;
+                pv->duration = 90000. /
+                            (double)( pv->context->sample_rate * pv->context->channels );
             }
+            decodeAudio( w->audio, pv, parser_output_buffer, parser_output_buffer_len );
         }
-
-        pos += len;
     }
-
-    pv->pts_next = cur;
-
+    *buf_out = link_buf_list( pv );
     return HB_WORK_OK;
 }
 
@@ -202,6 +362,7 @@ static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
                              hb_work_info_t *info )
 {
     hb_work_private_t *pv = w->private_data;
+    int ret = 0;
 
     memset( info, 0, sizeof(*info) );
 
@@ -214,18 +375,73 @@ static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
     // now we just return dummy values if there's a codec that will handle it.
     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
                                                            CODEC_ID_MP2 );
-    if ( codec )
+    if ( ! codec )
     {
-        static char codec_name[64];
+        // there's no ffmpeg codec for this audio type - give up
+        return -1;
+    }
 
-        info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
-        info->bitrate = 384000;
-        info->rate = 48000;
-        info->rate_base = 1;
-        info->channel_layout = HB_INPUT_CH_LAYOUT_STEREO;
-        return 1;
+    static char codec_name[64];
+    info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
+
+    AVCodecParserContext *parser = av_parser_init( codec->id );
+    AVCodecContext *context = avcodec_alloc_context();
+    hb_avcodec_open( context, codec );
+    uint8_t *buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
+    int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
+    unsigned char *pbuffer;
+    int pos, pbuffer_size;
+
+    while ( buf && !ret )
+    {
+        pos = 0;
+        while ( pos < buf->size )
+        {
+            int len;
+
+            if (parser != NULL )
+            {
+                len = av_parser_parse2( parser, context, &pbuffer, 
+                                        &pbuffer_size, buf->data + pos, 
+                                        buf->size - pos, buf->start, 
+                                        buf->start, AV_NOPTS_VALUE );
+            }
+            else
+            {
+                pbuffer = buf->data;
+                len = pbuffer_size = buf->size;
+            }
+            pos += len;
+            if ( pbuffer_size > 0 )
+            {
+                AVPacket avp;
+                av_init_packet( &avp );
+                avp.data = pbuffer;
+                avp.size = pbuffer_size;
+
+                len = avcodec_decode_audio3( context, (int16_t*)buffer, 
+                                             &out_size, &avp );
+                if ( len > 0 && context->sample_rate > 0 )
+                {
+                    info->bitrate = context->bit_rate;
+                    info->rate = context->sample_rate;
+                    info->rate_base = 1;
+                    info->channel_layout = 
+                        hb_ff_layout_xlat(context->channel_layout, 
+                                          context->channels);
+                    ret = 1;
+                    break;
+                }
+            }
+        }
+        buf = buf->next;
     }
-    return -1;
+
+    av_free( buffer );
+    if ( parser != NULL )
+        av_parser_close( parser );
+    hb_avcodec_close( context );
+    return ret;
 }
 
 /* -------------------------------------------------------------
@@ -250,18 +466,52 @@ static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride
     return dst;
 }
 
-/* Note: assumes frame format is PIX_FMT_YUV420P */
-static hb_buffer_t *copy_frame( AVCodecContext *context, AVFrame *frame )
+// copy one video frame into an HB buf. If the frame isn't in our color space
+// or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
+// Otherwise just copy the bits.
+static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 {
-    int w = context->width, h = context->height;
-    hb_buffer_t *buf = hb_buffer_init( w * h * 3 / 2 );
+    AVCodecContext *context = pv->context;
+    int w, h;
+    if ( ! pv->job )
+    {
+        // if the dimensions are odd, drop the lsb since h264 requires that
+        // both width and height be even.
+        w = ( context->width >> 1 ) << 1;
+        h = ( context->height >> 1 ) << 1;
+    }
+    else
+    {
+        w =  pv->job->title->width;
+        h =  pv->job->title->height;
+    }
+    hb_buffer_t *buf = hb_video_buffer_init( w, h );
     uint8_t *dst = buf->data;
 
-    dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
-    w >>= 1; h >>= 1;
-    dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
-    dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
+    if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
+         h != context->height )
+    {
+        // have to convert to our internal color space and/or rescale
+        AVPicture dstpic;
+        avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 
+        if ( ! pv->sws_context )
+        {
+            pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
+                                              w, h, PIX_FMT_YUV420P,
+                                              SWS_LANCZOS|SWS_ACCURATE_RND,
+                                              NULL, NULL, NULL );
+        }
+        sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
+                   dstpic.data, dstpic.linesize );
+    }
+    else
+    {
+        dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
+        w = (w + 1) >> 1; h = (h + 1) >> 1;
+        dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
+        dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
+    }
     return buf;
 }
 
@@ -270,23 +520,88 @@ static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
     hb_work_private_t *pv = context->opaque;
     frame->pts = pv->pts;
     pv->pts = -1;
-
     return avcodec_default_get_buffer( context, frame );
 }
 
+static int reget_frame_buf( AVCodecContext *context, AVFrame *frame )
+{
+    hb_work_private_t *pv = context->opaque;
+    frame->pts = pv->pts;
+    pv->pts = -1;
+    return avcodec_default_reget_buffer( context, frame );
+}
+
 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 {
-    hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
-    hb_log( "%s: \"%s\" (%d) at frame %u time %lld", pv->context->codec->name,
-            c->title, chap_num, pv->nframes, pts );
+    hb_chapter_t *c;
+
+    if ( !pv->job )
+        return;
+
+    c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
+    if ( c && c->title )
+    {
+        hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
+                pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
+    }
+    else
+    {
+        hb_log( "%s: Chapter %d at frame %u time %"PRId64,
+                pv->context->codec->name, chap_num, pv->nframes, pts );
+    }
 }
 
-static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
+static void flushDelayQueue( hb_work_private_t *pv )
 {
-    int got_picture;
+    hb_buffer_t *buf;
+    int slot = pv->nframes & (HEAP_SIZE-1);
+
+    // flush all the video packets left on our timestamp-reordering delay q
+    while ( ( buf = pv->delayq[slot] ) != NULL )
+    {
+        buf->start = heap_pop( &pv->pts_heap );
+        hb_list_add( pv->list, buf );
+        pv->delayq[slot] = NULL;
+        slot = ( slot + 1 ) & (HEAP_SIZE-1);
+    }
+}
+
+/*
+ * Decodes a video frame from the specified raw packet data ('data', 'size', 'sequence').
+ * The output of this function is stored in 'pv->list', which contains a list
+ * of zero or more decoded packets.
+ * 
+ * The returned packets are guaranteed to have their timestamps in the correct order,
+ * even if the original packets decoded by libavcodec have misordered timestamps,
+ * due to the use of 'packed B-frames'.
+ * 
+ * Internally the set of decoded packets may be buffered in 'pv->delayq'
+ * until enough packets have been decoded so that the timestamps can be
+ * correctly rewritten, if this is necessary.
+ */
+static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size, int sequence )
+{
+    int got_picture, oldlevel = 0;
     AVFrame frame;
+    AVPacket avp;
 
-    avcodec_decode_video( pv->context, &frame, &got_picture, data, size );
+    if ( global_verbosity_level <= 1 )
+    {
+        oldlevel = av_log_get_level();
+        av_log_set_level( AV_LOG_QUIET );
+    }
+
+    av_init_packet( &avp );
+    avp.data = data;
+    avp.size = size;
+    if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
+    {
+        ++pv->decode_errors;     
+    }
+    if ( global_verbosity_level <= 1 )
+    {
+        av_log_set_level( oldlevel );
+    }
     if( got_picture )
     {
         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
@@ -301,44 +616,113 @@ static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
         // worked at this point frame.pts should hold the frame's pts from the
         // original data stream or -1 if it didn't have one. in the latter case
         // we generate the next pts in sequence for it.
+        double frame_dur = pv->duration;
+        if ( frame_dur <= 0 )
+        {
+            frame_dur = 90000. * (double)pv->context->time_base.num /
+                        (double)pv->context->time_base.den;
+            pv->duration = frame_dur;
+        }
+        if ( frame.repeat_pict )
+        {
+            frame_dur += frame.repeat_pict * frame_dur * 0.5;
+        }
+        // XXX Unlike every other video decoder, the Raw decoder doesn't
+        //     use the standard buffer allocation routines so we never
+        //     get to put a PTS in the frame. Do it now.
+        if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
+        {
+            frame.pts = pv->pts;
+            pv->pts = -1;
+        }
+        // If there was no pts for this frame, assume constant frame rate
+        // video & estimate the next frame time from the last & duration.
         double pts = frame.pts;
         if ( pts < 0 )
         {
             pts = pv->pts_next;
         }
-        if ( pv->duration == 0 )
-        {
-            pv->duration = 90000. * pv->context->time_base.num /
-                           pv->context->time_base.den;
-        }
-        double frame_dur = pv->duration;
-        frame_dur += frame.repeat_pict * frame_dur * 0.5;
         pv->pts_next = pts + frame_dur;
 
-        hb_buffer_t *buf = copy_frame( pv->context, &frame );
-        buf->start = pts;
+        hb_buffer_t *buf;
 
-        if ( pv->new_chap && buf->start >= pv->chap_time )
+        // if we're doing a scan or this content couldn't have been broken
+        // by Microsoft we don't worry about timestamp reordering
+        if ( ! pv->job || ! pv->brokenByMicrosoft )
         {
-            buf->new_chap = pv->new_chap;
-            pv->new_chap = 0;
-            pv->chap_time = 0;
-            if ( pv->job )
+            buf = copy_frame( pv, &frame );
+            buf->start = pts;
+            buf->sequence = sequence;
+            if ( pv->new_chap && buf->start >= pv->chap_time )
             {
+                buf->new_chap = pv->new_chap;
+                pv->new_chap = 0;
+                pv->chap_time = 0;
                 log_chapter( pv, buf->new_chap, buf->start );
             }
+            else if ( pv->nframes == 0 && pv->job )
+            {
+                log_chapter( pv, pv->job->chapter_start, buf->start );
+            }
+            hb_list_add( pv->list, buf );
+            ++pv->nframes;
+            return got_picture;
         }
-        else if ( pv->job && pv->nframes == 0 )
+
+        // XXX This following probably addresses a libavcodec bug but I don't
+        //     see an easy fix so we workaround it here.
+        //
+        // The M$ 'packed B-frames' atrocity results in decoded frames with
+        // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
+        // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
+        // The frames are actually delivered in the right order but with
+        // the wrong timestamp. To get the correct timestamp attached to
+        // each frame we have a delay queue (longer than the max number of
+        // b-frames) & a sorting heap for the timestamps. As each frame
+        // comes out of the decoder the oldest frame in the queue is removed
+        // and associated with the smallest timestamp. Then the new frame is
+        // added to the queue & its timestamp is pushed on the heap.
+        // This does nothing if the timestamps are correct (i.e., the video
+        // uses a codec that Micro$oft hasn't broken yet) but the frames
+        // get timestamped correctly even when M$ has munged them.
+
+        // remove the oldest picture from the frame queue (if any) &
+        // give it the smallest timestamp from our heap. The queue size
+        // is a power of two so we get the slot of the oldest by masking
+        // the frame count & this will become the slot of the newest
+        // once we've removed & processed the oldest.
+        int slot = pv->nframes & (HEAP_SIZE-1);
+        if ( ( buf = pv->delayq[slot] ) != NULL )
         {
-            log_chapter( pv, pv->job->chapter_start, buf->start );
+            buf->start = heap_pop( &pv->pts_heap );
+
+            if ( pv->new_chap && buf->start >= pv->chap_time )
+            {
+                buf->new_chap = pv->new_chap;
+                pv->new_chap = 0;
+                pv->chap_time = 0;
+                log_chapter( pv, buf->new_chap, buf->start );
+            }
+            else if ( pv->nframes == 0 && pv->job )
+            {
+                log_chapter( pv, pv->job->chapter_start, buf->start );
+            }
+            hb_list_add( pv->list, buf );
         }
-        hb_list_add( pv->list, buf );
+
+        // add the new frame to the delayq & push its timestamp on the heap
+        buf = copy_frame( pv, &frame );
+        buf->sequence = sequence;
+        pv->delayq[slot] = buf;
+        heap_push( &pv->pts_heap, pts );
+
         ++pv->nframes;
     }
+
     return got_picture;
 }
 
-static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
+static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size, int sequence,
                          int64_t pts, int64_t dts )
 {
     /*
@@ -351,23 +735,31 @@ static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
     do {
         uint8_t *pout;
         int pout_len;
-        int len = av_parser_parse( pv->parser, pv->context, &pout, &pout_len,
-                                   data + pos, size - pos, pts, dts );
+        int len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
+                                    data + pos, size - pos, pts, dts, AV_NOPTS_VALUE );
         pos += len;
 
         if ( pout_len > 0 )
         {
             pv->pts = pv->parser->pts;
-            decodeFrame( pv, pout, pout_len );
+            decodeFrame( pv, pout, pout_len, sequence );
         }
     } while ( pos < size );
 
     /* the stuff above flushed the parser, now flush the decoder */
-    while ( size == 0 && decodeFrame( pv, NULL, 0 ) )
+    if ( size <= 0 )
     {
+        while ( decodeFrame( pv, NULL, 0, sequence ) )
+        {
+        }
+        flushDelayQueue( pv );
     }
 }
 
+/*
+ * Removes all packets from 'pv->list', links them together into
+ * a linked-list, and returns the first packet in the list.
+ */
 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 {
     hb_buffer_t *head = hb_list_item( pv->list, 0 );
@@ -404,18 +796,93 @@ static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
     pv->context->opaque = pv;
     pv->context->get_buffer = get_frame_buf;
+    pv->context->reget_buffer = reget_frame_buf;
+
+    return 0;
+}
+
+static int next_hdr( hb_buffer_t *in, int offset )
+{
+    uint8_t *dat = in->data;
+    uint16_t last2 = 0xffff;
+    for ( ; in->size - offset > 1; ++offset )
+    {
+        if ( last2 == 0 && dat[offset] == 0x01 )
+            // found an mpeg start code
+            return offset - 2;
+
+        last2 = ( last2 << 8 ) | dat[offset];
+    }
 
-    AVCodec *codec = avcodec_find_decoder( codec_id );
+    return -1;
+}
+
+static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
+{
+    if ( in->size - offset < 4 )
+        // not enough room for an mpeg start code
+        return -1;
+
+    for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
+    {
+        if ( in->data[offset+3] == hdr_type )
+            // found it
+            break;
+    }
+    return offset;
+}
+
+static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
+{
+    hb_work_private_t *pv = w->private_data;
 
     // we can't call the avstream funcs but the read_header func in the
     // AVInputFormat may set up some state in the AVContext. In particular 
     // vc1t_read_header allocates 'extradata' to deal with header issues
     // related to Microsoft's bizarre engineering notions. We alloc a chunk
     // of space to make vc1 work then associate the codec with the context.
-    pv->context->extradata_size = 32;
-    pv->context->extradata = av_malloc(pv->context->extradata_size);
-    avcodec_open( pv->context, codec );
+    if ( w->codec_param != CODEC_ID_VC1 )
+    {
+        // we haven't been inflicted with M$ - allocate a little space as
+        // a marker and return success.
+        pv->context->extradata_size = 16;
+        pv->context->extradata = av_malloc(pv->context->extradata_size);
+        return 0;
+    }
 
+    // find the start and and of the sequence header
+    int shdr, shdr_end;
+    if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
+    {
+        // didn't find start of seq hdr
+        return 1;
+    }
+    if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
+    {
+        shdr_end = in->size;
+    }
+    shdr_end -= shdr;
+
+    // find the start and and of the entry point header
+    int ehdr, ehdr_end;
+    if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
+    {
+        // didn't find start of entry point hdr
+        return 1;
+    }
+    if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
+    {
+        ehdr_end = in->size;
+    }
+    ehdr_end -= ehdr;
+
+    // found both headers - allocate an extradata big enough to hold both
+    // then copy them into it.
+    pv->context->extradata_size = shdr_end + ehdr_end;
+    pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
+    memcpy( pv->context->extradata, in->data + shdr, shdr_end );
+    memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
+    memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
     return 0;
 }
 
@@ -424,7 +891,7 @@ static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 {
     hb_work_private_t *pv = w->private_data;
     hb_buffer_t *in = *buf_in;
-    int64_t pts = -1;
+    int64_t pts = AV_NOPTS_VALUE;
     int64_t dts = pts;
 
     *buf_in = NULL;
@@ -432,13 +899,33 @@ static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
     /* if we got an empty buffer signaling end-of-stream send it downstream */
     if ( in->size == 0 )
     {
-        decodeVideo( pv, in->data, in->size, pts, dts );
+        decodeVideo( pv, in->data, in->size, in->sequence, pts, dts );
         hb_list_add( pv->list, in );
         *buf_out = link_buf_list( pv );
-        hb_log( "%s done: %d frames", pv->context->codec->name, pv->nframes );
         return HB_WORK_DONE;
     }
 
+    // if this is the first frame open the codec (we have to wait for the
+    // first frame because of M$ VC1 braindamage).
+    if ( pv->context->extradata_size == 0 )
+    {
+        if ( setup_extradata( w, in ) )
+        {
+            // we didn't find the headers needed to set up extradata.
+            // the codec will abort if we open it so just free the buf
+            // and hope we eventually get the info we need.
+            hb_buffer_close( &in );
+            return HB_WORK_OK;
+        }
+        AVCodec *codec = avcodec_find_decoder( w->codec_param );
+        // There's a mis-feature in ffmpeg that causes the context to be 
+        // incorrectly initialized the 1st time avcodec_open is called.
+        // If you close it and open a 2nd time, it finishes the job.
+        hb_avcodec_open( pv->context, codec );
+        hb_avcodec_close( pv->context );
+        hb_avcodec_open( pv->context, codec );
+    }
+
     if( in->start >= 0 )
     {
         pts = in->start;
@@ -449,7 +936,7 @@ static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
         pv->new_chap = in->new_chap;
         pv->chap_time = pts >= 0? pts : pv->pts_next;
     }
-    decodeVideo( pv, in->data, in->size, pts, dts );
+    decodeVideo( pv, in->data, in->size, in->sequence, pts, dts );
     hb_buffer_close( &in );
     *buf_out = link_buf_list( pv );
     return HB_WORK_OK;
@@ -473,23 +960,38 @@ static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
         info->rate = 27000000;
         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
                           context->time_base.den;
+        if ( context->ticks_per_frame > 1 )
+        {
+            // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
+            // field rate rather than frame rate so convert back to frames.
+            info->rate_base *= context->ticks_per_frame;
+        }
         
-        /* Sometimes there's no pixel aspect set in the source. In that case,
-           assume a 1:1 PAR. Otherwise, preserve the source PAR.             */
-        info->pixel_aspect_width = context->sample_aspect_ratio.num ?
-                                        context->sample_aspect_ratio.num : 1;
-        info->pixel_aspect_height = context->sample_aspect_ratio.den ?
-                                        context->sample_aspect_ratio.den : 1;
-
+        info->pixel_aspect_width = context->sample_aspect_ratio.num;
+        info->pixel_aspect_height = context->sample_aspect_ratio.den;
+
+        /* Sometimes there's no pixel aspect set in the source ffmpeg context
+         * which appears to come from the video stream. In that case,
+         * try the pixel aspect in AVStream (which appears to come from
+         * the container). Else assume a 1:1 PAR. */
+        if ( info->pixel_aspect_width == 0 ||
+             info->pixel_aspect_height == 0 )
+        {
+            // There will not be an ffmpeg stream if the file is TS
+            AVStream *st = hb_ffmpeg_avstream( w->codec_param );
+            info->pixel_aspect_width = st && st->sample_aspect_ratio.num ?
+                                       st->sample_aspect_ratio.num : 1;
+            info->pixel_aspect_height = st && st->sample_aspect_ratio.den ?
+                                        st->sample_aspect_ratio.den : 1;
+        }
         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
          * Display Aspect Ratio so we convert by scaling by the Storage
          * Aspect Ratio (w/h). We do the calc in floating point to get the
-         * rounding right. We round in the second decimal digit because we
-         * scale the (integer) aspect by 9 to preserve the 1st digit.  */
-        info->aspect = ( (double)info->pixel_aspect_width * 
-                         (double)context->width /
-                         (double)info->pixel_aspect_height /
-                         (double)context->height + 0.05 ) * HB_ASPECT_BASE;
+         * rounding right. */
+        info->aspect = (double)info->pixel_aspect_width * 
+                       (double)context->width /
+                       (double)info->pixel_aspect_height /
+                       (double)context->height;
 
         info->profile = context->profile;
         info->level = context->level;
@@ -539,22 +1041,65 @@ static void init_ffmpeg_context( hb_work_object_t *w )
     if ( ! pv->context->codec )
     {
         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
-        avcodec_open( pv->context, codec );
+        hb_avcodec_open( pv->context, codec );
     }
     // set up our best guess at the frame duration.
-    // the frame rate in the codec seems to be bogus but it's ok in the stream.
+    // the frame rate in the codec is usually bogus but it's sometimes
+    // ok in the stream.
     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
-    AVRational tb = st->time_base;
-    if ( st->r_frame_rate.den && st->r_frame_rate.num )
+
+    if ( st->nb_frames && st->duration )
+    {
+        // compute the average frame duration from the total number
+        // of frames & the total duration.
+        pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
+                       ( (double)st->nb_frames * (double)st->time_base.den );
+    }
+    else
     {
-        tb.num = st->r_frame_rate.den;
-        tb.den = st->r_frame_rate.num;
+        // XXX We don't have a frame count or duration so try to use the
+        // far less reliable time base info in the stream.
+        // Because the time bases are so screwed up, we only take values
+        // in the range 8fps - 64fps.
+        AVRational tb;
+        if ( st->avg_frame_rate.den * 64 > st->avg_frame_rate.num &&
+             st->avg_frame_rate.num > st->avg_frame_rate.den * 8 )
+        {
+            tb.num = st->avg_frame_rate.den;
+            tb.den = st->avg_frame_rate.num;
+        }
+        else if ( st->time_base.num * 64 > st->time_base.den &&
+                  st->time_base.den > st->time_base.num * 8 )
+        {
+            tb = st->time_base;
+        }
+        else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
+                  st->r_frame_rate.num > st->r_frame_rate.den * 8 )
+        {
+            tb.num = st->r_frame_rate.den;
+            tb.den = st->r_frame_rate.num;
+        }
+        else
+        {
+            tb.num = 1001;  /*XXX*/
+            tb.den = 24000; /*XXX*/
+        }
+        pv->duration =  (double)tb.num / (double)tb.den;
     }
-    pv->duration = 90000. * tb.num / tb.den;
+    pv->duration *= 90000.;
 
     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
     pv->context->opaque = pv;
     pv->context->get_buffer = get_frame_buf;
+    pv->context->reget_buffer = reget_frame_buf;
+
+    // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
+    // b-frames abortion that messes up frame ordering and timestamps.
+    // XXX ffmpeg knows which streams are broken but doesn't expose the
+    //     info externally. We should patch ffmpeg to add a flag to the
+    //     codec context for this but until then we mark all ffmpeg streams
+    //     as suspicious.
+    pv->brokenByMicrosoft = 1;
 }
 
 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
@@ -581,6 +1126,17 @@ static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
     w->private_data = pv;
     pv->job   = job;
     pv->list = hb_list_init();
+    pv->pts_next = -1;
+    pv->pts = -1;
+
+    if ( w->audio != NULL &&
+         hb_need_downmix( w->audio->config.in.channel_layout, 
+                          w->audio->config.out.mixdown) )
+    {
+        pv->downmix = hb_downmix_init(w->audio->config.in.channel_layout, 
+                                      w->audio->config.out.mixdown);
+        hb_downmix_set_chan_map( pv->downmix, &hb_smpte_chan_map, &hb_qt_chan_map );
+    }
 
     return 0;
 }
@@ -589,82 +1145,36 @@ static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
                              hb_buffer_t ** buf_out )
 {
     hb_work_private_t *pv = w->private_data;
-    if ( ! pv->context )
-    {
-        init_ffmpeg_context( w );
-
-        switch ( pv->context->codec_id )
-        {
-            // These are the only formats whose timestamps we'll believe.
-            // All others are treated as CFR (i.e., we take the first timestamp
-            // then generate all the others from the frame rate). The reason for
-            // this is that the M$ encoders are so frigging buggy with garbage
-            // like packed b-frames (vfw divx mpeg4) that believing their timestamps
-            // results in discarding more than half the video frames because they'll
-            // be out of sequence (and attempting to reseqence them doesn't work
-            // because it's the timestamps that are wrong, not the decoded frame
-            // order). All hail Redmond, ancestral home of the rich & stupid.
-            case CODEC_ID_MPEG2VIDEO:
-            case CODEC_ID_RAWVIDEO:
-            case CODEC_ID_H264:
-            case CODEC_ID_VC1:
-                break;
-
-            default:
-                pv->ignore_pts = 1;
-                break;
-        }
-    }
     hb_buffer_t *in = *buf_in;
-    int64_t pts = -1;
-
     *buf_in = NULL;
 
     /* if we got an empty buffer signaling end-of-stream send it downstream */
     if ( in->size == 0 )
     {
         /* flush any frames left in the decoder */
-        while ( decodeFrame( pv, NULL, 0 ) )
+        while ( pv->context && decodeFrame( pv, NULL, 0, in->sequence ) )
         {
         }
+        flushDelayQueue( pv );
         hb_list_add( pv->list, in );
         *buf_out = link_buf_list( pv );
-        hb_log( "%s done: %d frames %d drops", pv->context->codec->name,
-                pv->nframes, pv->ndrops );
         return HB_WORK_DONE;
     }
 
-    if( in->start >= 0 )
+    if ( ! pv->context )
     {
-        // use the first timestamp as our 'next expected' pts
-        if ( pv->pts_next <= 0 )
-        {
-            pv->pts_next = in->start;
-        }
+        init_ffmpeg_context( w );
+    }
 
-        if ( ! pv->ignore_pts )
+    int64_t pts = in->start;
+    if( pts >= 0 )
+    {
+        // use the first timestamp as our 'next expected' pts
+        if ( pv->pts_next < 0 )
         {
-            pts = in->start;
-            if ( pv->pts > 0 )
-            {
-                hb_log( "overwriting pts %lld with %lld (diff %d)",
-                        pv->pts, pts, pts - pv->pts );
-            }
-            if ( pv->pts_next - pts >= pv->duration )
-            {
-                // this frame starts more than a frame time before where
-                // the nominal frame rate says it should - drop it.
-                // log the first 10 drops so we'll know what's going on.
-                if ( pv->ndrops++ < 10 )
-                {
-                    hb_log( "time reversal next %.0f pts %lld (diff %g)",
-                            pv->pts_next, pts, pv->pts_next - pts );
-                }
-                hb_buffer_close( &in );
-                return HB_WORK_OK;
-            }
-            pv->pts = pts;
+            pv->pts_next = pts;
         }
+        pv->pts = pts;
     }
 
     if ( in->new_chap )
@@ -673,7 +1183,7 @@ static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
         pv->chap_time = pts >= 0? pts : pv->pts_next;
     }
     prepare_ffmpeg_buffer( in );
-    decodeFrame( pv, in->data, in->size );
+    decodeFrame( pv, in->data, in->size, in->sequence );
     hb_buffer_close( &in );
     *buf_out = link_buf_list( pv );
     return HB_WORK_OK;
@@ -683,62 +1193,139 @@ static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
 {
     if ( decavcodecvInfo( w, info ) )
     {
-        // the frame rate in the codec seems to be bogus but it's ok in the stream.
-        AVStream *st = hb_ffmpeg_avstream( w->codec_param );
-        AVRational tb;
-        if ( st->r_frame_rate.den && st->r_frame_rate.num )
+        hb_work_private_t *pv = w->private_data;
+        if ( ! pv->context )
         {
-            tb.num = st->r_frame_rate.den;
-            tb.den = st->r_frame_rate.num;
+            init_ffmpeg_context( w );
         }
-        else
-        {
-            tb = st->time_base;
-        }
-
-        // ffmpeg gives the frame rate in frames per second while HB wants
-        // it in units of the 27MHz MPEG clock. */
+        // we have the frame duration in units of the 90KHz pts clock but
+        // need it in units of the 27MHz MPEG clock. */
         info->rate = 27000000;
-        info->rate_base = (int64_t)tb.num * 27000000LL / tb.den;
+        info->rate_base = pv->duration * 300.;
         return 1;
     }
     return 0;
 }
 
-static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
+static void decodeAudio( hb_audio_t * audio, hb_work_private_t *pv, uint8_t *data, int size )
 {
     AVCodecContext *context = pv->context;
     int pos = 0;
+    int loop_limit = 256;
 
     while ( pos < size )
     {
-        int16_t buffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
-        int out_size = sizeof(buffer);
-        int len = avcodec_decode_audio2( context, buffer, &out_size,
-                                         data + pos, size - pos );
-        if ( len <= 0 )
+        int16_t *buffer = pv->buffer;
+        if ( buffer == NULL )
+        {
+            pv->buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
+            buffer = pv->buffer;
+        }
+
+        AVPacket avp;
+        av_init_packet( &avp );
+        avp.data = data + pos;
+        avp.size = size - pos;
+
+        int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
+        int nsamples;
+        int len = avcodec_decode_audio3( context, buffer, &out_size, &avp );
+        if ( len < 0 )
         {
             return;
         }
+        if ( len == 0 )
+        {
+            if ( !(loop_limit--) )
+                return;
+        }
+        else
+            loop_limit = 256;
+
         pos += len;
         if( out_size > 0 )
         {
-            hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
+            // We require signed 16-bit ints for the output format. If
+            // we got something different convert it.
+            if ( context->sample_fmt != SAMPLE_FMT_S16 )
+            {
+                // Note: av_audio_convert seems to be a work-in-progress but
+                //       looks like it will eventually handle general audio
+                //       mixdowns which would allow us much more flexibility
+                //       in handling multichannel audio in HB. If we were doing
+                //       anything more complicated than a one-for-one format
+                //       conversion we'd probably want to cache the converter
+                //       context in the pv.
+                int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
+                AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
+                                                              context->sample_fmt, 1,
+                                                              NULL, 0 );
+                // get output buffer size (in 2-byte samples) then malloc a buffer
+                nsamples = out_size / isamp;
+                buffer = av_malloc( nsamples * 2 );
+
+                // we're doing straight sample format conversion which behaves as if
+                // there were only one channel.
+                const void * const ibuf[6] = { pv->buffer };
+                void * const obuf[6] = { buffer };
+                const int istride[6] = { isamp };
+                const int ostride[6] = { 2 };
+
+                av_audio_convert( ctx, obuf, ostride, ibuf, istride, nsamples );
+                av_audio_convert_free( ctx );
+            }
+            else
+            {
+                nsamples = out_size / 2;
+            }
+
+            hb_buffer_t * buf;
+
+            if ( pv->downmix )
+            {
+                pv->downmix_buffer = realloc(pv->downmix_buffer, nsamples * sizeof(hb_sample_t));
+                
+                int i;
+                for( i = 0; i < nsamples; ++i )
+                {
+                    pv->downmix_buffer[i] = buffer[i];
+                }
+
+                int n_ch_samples = nsamples / context->channels;
+                int channels = HB_AMIXDOWN_GET_DISCRETE_CHANNEL_COUNT(audio->config.out.mixdown);
+
+                buf = hb_buffer_init( n_ch_samples * channels * sizeof(float) );
+                hb_sample_t *samples = (hb_sample_t *)buf->data;
+                hb_downmix(pv->downmix, samples, pv->downmix_buffer, n_ch_samples);
+            }
+            else
+            {
+                buf = hb_buffer_init( nsamples * sizeof(float) );
+                float *fl32 = (float *)buf->data;
+                int i;
+                for( i = 0; i < nsamples; ++i )
+                {
+                    fl32[i] = buffer[i];
+                }
+                int n_ch_samples = nsamples / context->channels;
+                hb_layout_remap( &hb_smpte_chan_map, &hb_qt_chan_map,
+                                 audio->config.in.channel_layout, 
+                                 fl32, n_ch_samples );
+            }
 
             double pts = pv->pts_next;
             buf->start = pts;
-            out_size >>= 1;
-            pts += out_size * pv->duration;
+            pts += nsamples * pv->duration;
             buf->stop  = pts;
             pv->pts_next = pts;
 
-            float *fl32 = (float *)buf->data;
-            int i;
-            for( i = 0; i < out_size; ++i )
+            hb_list_add( pv->list, buf );
+
+            // if we allocated a buffer for sample format conversion, free it
+            if ( buffer != pv->buffer )
             {
-                fl32[i] = buffer[i];
+                av_free( buffer );
             }
-            hb_list_add( pv->list, buf );
         }
     }
 }
@@ -746,22 +1333,45 @@ static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
                     hb_buffer_t **buf_out )
 {
+    if ( (*buf_in)->size <= 0 )
+    {
+        /* EOF on input stream - send it downstream & say that we're done */
+        *buf_out = *buf_in;
+        *buf_in = NULL;
+        return HB_WORK_DONE;
+    }
+
     hb_work_private_t *pv = w->private_data;
+
+    if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
+    {
+        // discard buffers that start before video time 0
+        *buf_out = NULL;
+        return HB_WORK_OK;
+    }
+
     if ( ! pv->context )
     {
         init_ffmpeg_context( w );
+        // duration is a scaling factor to go from #bytes in the decoded
+        // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
+        // total samples to per-channel samples. 'sample_rate' converts
+        // per-channel samples to seconds per sample and the 90000
+        // is mpeg ticks per second.
         pv->duration = 90000. /
                     (double)( pv->context->sample_rate * pv->context->channels );
     }
     hb_buffer_t *in = *buf_in;
 
+    // if the packet has a timestamp use it if we don't have a timestamp yet
+    // or if there's been a timing discontinuity of more than 100ms.
     if ( in->start >= 0 &&
          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
     {
         pv->pts_next = in->start;
     }
     prepare_ffmpeg_buffer( in );
-    decodeAudio( pv, in->data, in->size );
+    decodeAudio( w->audio, pv, in->data, in->size );
     *buf_out = link_buf_list( pv );
 
     return HB_WORK_OK;