libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static int64_t heap_pop( pts_heap_t *heap )
 114 {
 115     int64_t result;
 116
 117     if ( heap->nheap <= 0 )
 118     {
 119         return -1;
 120     }
 121
 122     // return the top of the heap then put the bottom element on top,
 123     // decrease the heap size by one & rebalence the heap.
 124     result = heap->h[1];
 125
 126     int64_t v = heap->h[heap->nheap--];
 127     int parent = 1;
 128     int child = parent << 1;
 129     while ( child <= heap->nheap )
 130     {
 131         // find the smallest of the two children of parent
 132         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 133             ++child;
 134
 135         if (v <= heap->h[child])
 136             // new item is smaller than either child so it's the new parent.
 137             break;
 138
 139         // smallest child is smaller than new item so move it up then
 140         // check its children.
 141         int64_t hp = heap->h[child];
 142         heap->h[parent] = hp;
 143         parent = child;
 144         child = parent << 1;
 145     }
 146     heap->h[parent] = v;
 147     return result;
 148 }
 149
 150 static void heap_push( pts_heap_t *heap, int64_t v )
 151 {
 152     if ( heap->nheap < HEAP_SIZE )
 153     {
 154         ++heap->nheap;
 155     }
 156
 157     // stick the new value on the bottom of the heap then bubble it
 158     // up to its correct spot.
 159         int child = heap->nheap;
 160         while (child > 1) {
 161                 int parent = child >> 1;
 162                 if (heap->h[parent] <= v)
 163                         break;
 164                 // move parent down
 165                 int64_t hp = heap->h[parent];
 166                 heap->h[child] = hp;
 167                 child = parent;
 168         }
 169         heap->h[child] = v;
 170 }
 171
 172
 173 /***********************************************************************
 174  * hb_work_decavcodec_init
 175  ***********************************************************************
 176  *
 177  **********************************************************************/
 178 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 179 {
 180     AVCodec * codec;
 181
 182     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 183     w->private_data = pv;
 184
 185     pv->job   = job;
 186
 187     int codec_id = w->codec_param;
 188     /*XXX*/
 189     if ( codec_id == 0 )
 190         codec_id = CODEC_ID_MP2;
 191
 192     codec = avcodec_find_decoder( codec_id );
 193     pv->parser = av_parser_init( codec_id );
 194
 195     pv->context = avcodec_alloc_context();
 196     hb_avcodec_open( pv->context, codec );
 197
 198     return 0;
 199 }
 200
 201 /***********************************************************************
 202  * Close
 203  ***********************************************************************
 204  *
 205  **********************************************************************/
 206 static void decavcodecClose( hb_work_object_t * w )
 207 {
 208     hb_work_private_t * pv = w->private_data;
 209
 210     if ( pv )
 211     {
 212         if ( pv->job && pv->context && pv->context->codec )
 213         {
 214             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 215                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 216                     pv->ndrops );
 217         }
 218         if ( pv->sws_context )
 219         {
 220             sws_freeContext( pv->sws_context );
 221         }
 222         if ( pv->parser )
 223         {
 224             av_parser_close(pv->parser);
 225         }
 226         if ( pv->context && pv->context->codec )
 227         {
 228             hb_avcodec_close( pv->context );
 229         }
 230         if ( pv->list )
 231         {
 232             hb_list_close( &pv->list );
 233         }
 234         if ( pv->buffer )
 235         {
 236             free( pv->buffer );
 237             pv->buffer = NULL;
 238         }
 239         free( pv );
 240         w->private_data = NULL;
 241     }
 242 }
 243
 244 /***********************************************************************
 245  * Work
 246  ***********************************************************************
 247  *
 248  **********************************************************************/
 249 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 250                     hb_buffer_t ** buf_out )
 251 {
 252     hb_work_private_t * pv = w->private_data;
 253     hb_buffer_t * in = *buf_in, * buf, * last = NULL;
 254     int   pos, len, out_size, i, uncompressed_len;
 255     short buffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
 256     uint64_t cur;
 257     unsigned char *parser_output_buffer;
 258     int parser_output_buffer_len;
 259
 260     if ( (*buf_in)->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = *buf_in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     cur = ( in->start < 0 )? pv->pts_next : in->start;
 277
 278     pos = 0;
 279     while( pos < in->size )
 280     {
 281         len = av_parser_parse( pv->parser, pv->context,
 282                                &parser_output_buffer, &parser_output_buffer_len,
 283                                in->data + pos, in->size - pos, cur, cur );
 284         out_size = 0;
 285         uncompressed_len = 0;
 286         if (parser_output_buffer_len)
 287         {
 288             out_size = sizeof(buffer);
 289             uncompressed_len = avcodec_decode_audio2( pv->context, buffer,
 290                                                       &out_size,
 291                                                       parser_output_buffer,
 292                                                       parser_output_buffer_len );
 293         }
 294         if( out_size )
 295         {
 296             short * s16;
 297             float * fl32;
 298
 299             buf = hb_buffer_init( 2 * out_size );
 300
 301             int sample_size_in_bytes = 2;   // Default to 2 bytes
 302             switch (pv->context->sample_fmt)
 303             {
 304               case SAMPLE_FMT_S16:
 305                 sample_size_in_bytes = 2;
 306                 break;
 307               /* We should handle other formats here - but that needs additional format conversion work below */
 308               /* For now we'll just report the error and try to carry on */
 309               default:
 310                 hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
 311                 break;
 312             }
 313
 314             buf->start = cur;
 315             buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
 316                          pv->context->sample_rate;
 317             cur = buf->stop;
 318
 319             s16  = buffer;
 320             fl32 = (float *) buf->data;
 321             for( i = 0; i < out_size / 2; i++ )
 322             {
 323                 fl32[i] = s16[i];
 324             }
 325
 326             if( last )
 327             {
 328                 last = last->next = buf;
 329             }
 330             else
 331             {
 332                 *buf_out = last = buf;
 333             }
 334         }
 335
 336         pos += len;
 337     }
 338
 339     pv->pts_next = cur;
 340
 341     return HB_WORK_OK;
 342 }
 343
 344 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 345 {
 346     hb_work_private_t *pv = w->private_data;
 347
 348     memset( info, 0, sizeof(*info) );
 349
 350     if ( pv && pv->context )
 351     {
 352         AVCodecContext *context = pv->context;
 353         info->bitrate = context->bit_rate;
 354         info->rate = context->time_base.num;
 355         info->rate_base = context->time_base.den;
 356         info->profile = context->profile;
 357         info->level = context->level;
 358         return 1;
 359     }
 360     return 0;
 361 }
 362
 363 static const int chan2layout[] = {
 364     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 365     HB_INPUT_CH_LAYOUT_MONO,
 366     HB_INPUT_CH_LAYOUT_STEREO,
 367     HB_INPUT_CH_LAYOUT_2F1R,
 368     HB_INPUT_CH_LAYOUT_2F2R,
 369     HB_INPUT_CH_LAYOUT_3F2R,
 370     HB_INPUT_CH_LAYOUT_4F2R,
 371     HB_INPUT_CH_LAYOUT_STEREO,
 372     HB_INPUT_CH_LAYOUT_STEREO,
 373 };
 374
 375 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 376                              hb_work_info_t *info )
 377 {
 378     hb_work_private_t *pv = w->private_data;
 379     int ret = 0;
 380
 381     memset( info, 0, sizeof(*info) );
 382
 383     if ( pv && pv->context )
 384     {
 385         return decavcodecInfo( w, info );
 386     }
 387     // XXX
 388     // We should parse the bitstream to find its parameters but for right
 389     // now we just return dummy values if there's a codec that will handle it.
 390     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 391                                                            CODEC_ID_MP2 );
 392     if ( ! codec )
 393     {
 394         // there's no ffmpeg codec for this audio type - give up
 395         return -1;
 396     }
 397
 398     static char codec_name[64];
 399     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 400
 401     AVCodecParserContext *parser = av_parser_init( codec->id );
 402     AVCodecContext *context = avcodec_alloc_context();
 403     hb_avcodec_open( context, codec );
 404 #if defined( SYS_CYGWIN )
 405     uint8_t *buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
 406 #else
 407     uint8_t *buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 408 #endif
 409     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 410     unsigned char *pbuffer;
 411     int pos = 0, pbuffer_size;
 412
 413     while ( pos < buf->size )
 414     {
 415         int len = av_parser_parse( parser, context, &pbuffer, &pbuffer_size,
 416                                    buf->data + pos, buf->size - pos,
 417                                    buf->start, buf->start );
 418         pos += len;
 419         if ( pbuffer_size > 0 )
 420         {
 421             len = avcodec_decode_audio2( context, (int16_t*)buffer, &out_size,
 422                                          pbuffer, pbuffer_size );
 423             if ( len > 0 && context->sample_rate > 0 )
 424             {
 425                 info->bitrate = context->bit_rate;
 426                 info->rate = context->sample_rate;
 427                 info->rate_base = 1;
 428                 info->channel_layout = chan2layout[context->channels & 7];
 429                 ret = 1;
 430                 break;
 431             }
 432         }
 433     }
 434     free( buffer );
 435     av_parser_close( parser );
 436     hb_avcodec_close( context );
 437     return ret;
 438 }
 439
 440 /* -------------------------------------------------------------
 441  * General purpose video decoder using libavcodec
 442  */
 443
 444 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 445                             int h )
 446 {
 447     if ( dstride == sstride )
 448     {
 449         memcpy( dst, src, dstride * h );
 450         return dst + dstride * h;
 451     }
 452     int lbytes = dstride <= sstride? dstride : sstride;
 453     while ( --h >= 0 )
 454     {
 455         memcpy( dst, src, lbytes );
 456         src += sstride;
 457         dst += dstride;
 458     }
 459     return dst;
 460 }
 461
 462 // copy one video frame into an HB buf. If the frame isn't in our color space
 463 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 464 // Otherwise just copy the bits.
 465 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 466 {
 467     AVCodecContext *context = pv->context;
 468     int w, h;
 469     if ( ! pv->job )
 470     {
 471         // if the dimensions are odd, drop the lsb since h264 requires that
 472         // both width and height be even.
 473         w = ( context->width >> 1 ) << 1;
 474         h = ( context->height >> 1 ) << 1;
 475     }
 476     else
 477     {
 478         w =  pv->job->title->width;
 479         h =  pv->job->title->height;
 480     }
 481     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 482     uint8_t *dst = buf->data;
 483
 484     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 485          h != context->height )
 486     {
 487         // have to convert to our internal color space and/or rescale
 488         AVPicture dstpic;
 489         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 490
 491         if ( ! pv->sws_context )
 492         {
 493             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 494                                               w, h, PIX_FMT_YUV420P,
 495                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 496                                               NULL, NULL, NULL );
 497         }
 498         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 499                    dstpic.data, dstpic.linesize );
 500     }
 501     else
 502     {
 503         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 504         w = (w + 1) >> 1; h = (h + 1) >> 1;
 505         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 506         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 507     }
 508     return buf;
 509 }
 510
 511 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 512 {
 513     hb_work_private_t *pv = context->opaque;
 514     frame->pts = pv->pts;
 515     pv->pts = -1;
 516     return avcodec_default_get_buffer( context, frame );
 517 }
 518
 519 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 520 {
 521     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 522     if ( c && c->title )
 523     {
 524         hb_log( "%s: \"%s\" (%d) at frame %u time %lld",
 525                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 526     }
 527     else
 528     {
 529         hb_log( "%s: Chapter %d at frame %u time %lld",
 530                 pv->context->codec->name, chap_num, pv->nframes, pts );
 531     }
 532 }
 533
 534 static void flushDelayQueue( hb_work_private_t *pv )
 535 {
 536     hb_buffer_t *buf;
 537     int slot = pv->nframes & (HEAP_SIZE-1);
 538
 539     // flush all the video packets left on our timestamp-reordering delay q
 540     while ( ( buf = pv->delayq[slot] ) != NULL )
 541     {
 542         buf->start = heap_pop( &pv->pts_heap );
 543         hb_list_add( pv->list, buf );
 544         pv->delayq[slot] = NULL;
 545         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 546     }
 547 }
 548
 549 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 550 {
 551     int got_picture, oldlevel = 0;
 552     AVFrame frame;
 553
 554     if ( global_verbosity_level <= 1 )
 555     {
 556         oldlevel = av_log_get_level();
 557         av_log_set_level( AV_LOG_QUIET );
 558     }
 559     if ( avcodec_decode_video( pv->context, &frame, &got_picture, data, size ) < 0 )
 560     {
 561         ++pv->decode_errors;
 562     }
 563     if ( global_verbosity_level <= 1 )
 564     {
 565         av_log_set_level( oldlevel );
 566     }
 567     if( got_picture )
 568     {
 569         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 570         // packet had a pts we handed it to av_parser_parse (if the packet had
 571         // no pts we set it to -1 but before the parse we can't distinguish between
 572         // the start of a video frame with no pts & an intermediate packet of
 573         // some frame which never has a pts). we hope that when parse returns
 574         // the frame to us the pts we originally handed it will be in parser->pts.
 575         // we put this pts into pv->pts so that when a avcodec_decode_video
 576         // finally gets around to allocating an AVFrame to hold the decoded
 577         // frame we can stuff that pts into the frame. if all of these relays
 578         // worked at this point frame.pts should hold the frame's pts from the
 579         // original data stream or -1 if it didn't have one. in the latter case
 580         // we generate the next pts in sequence for it.
 581         double frame_dur = pv->duration;
 582         if ( frame_dur <= 0 )
 583         {
 584             frame_dur = 90000. * (double)pv->context->time_base.num /
 585                         (double)pv->context->time_base.den;
 586             pv->duration = frame_dur;
 587         }
 588         if ( frame.repeat_pict )
 589         {
 590             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 591         }
 592         // XXX Unlike every other video decoder, the Raw decoder doesn't
 593         //     use the standard buffer allocation routines so we never
 594         //     get to put a PTS in the frame. Do it now.
 595         if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
 596         {
 597             frame.pts = pv->pts;
 598             pv->pts = -1;
 599         }
 600         // If there was no pts for this frame, assume constant frame rate
 601         // video & estimate the next frame time from the last & duration.
 602         double pts = frame.pts;
 603         if ( pts < 0 )
 604         {
 605             pts = pv->pts_next;
 606         }
 607         pv->pts_next = pts + frame_dur;
 608
 609         hb_buffer_t *buf;
 610
 611         // if we're doing a scan or this content couldn't have been broken
 612         // by Microsoft we don't worry about timestamp reordering
 613         if ( ! pv->job || ! pv->brokenByMicrosoft )
 614         {
 615             buf = copy_frame( pv, &frame );
 616             buf->start = pts;
 617             hb_list_add( pv->list, buf );
 618             ++pv->nframes;
 619             return got_picture;
 620         }
 621
 622         // XXX This following probably addresses a libavcodec bug but I don't
 623         //     see an easy fix so we workaround it here.
 624         //
 625         // The M$ 'packed B-frames' atrocity results in decoded frames with
 626         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 627         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 628         // The frames are actually delivered in the right order but with
 629         // the wrong timestamp. To get the correct timestamp attached to
 630         // each frame we have a delay queue (longer than the max number of
 631         // b-frames) & a sorting heap for the timestamps. As each frame
 632         // comes out of the decoder the oldest frame in the queue is removed
 633         // and associated with the smallest timestamp. Then the new frame is
 634         // added to the queue & its timestamp is pushed on the heap.
 635         // This does nothing if the timestamps are correct (i.e., the video
 636         // uses a codec that Micro$oft hasn't broken yet) but the frames
 637         // get timestamped correctly even when M$ has munged them.
 638
 639         // remove the oldest picture from the frame queue (if any) &
 640         // give it the smallest timestamp from our heap. The queue size
 641         // is a power of two so we get the slot of the oldest by masking
 642         // the frame count & this will become the slot of the newest
 643         // once we've removed & processed the oldest.
 644         int slot = pv->nframes & (HEAP_SIZE-1);
 645         if ( ( buf = pv->delayq[slot] ) != NULL )
 646         {
 647             buf->start = heap_pop( &pv->pts_heap );
 648
 649             if ( pv->new_chap && buf->start >= pv->chap_time )
 650             {
 651                 buf->new_chap = pv->new_chap;
 652                 pv->new_chap = 0;
 653                 pv->chap_time = 0;
 654                 log_chapter( pv, buf->new_chap, buf->start );
 655             }
 656             else if ( pv->nframes == 0 )
 657             {
 658                 log_chapter( pv, pv->job->chapter_start, buf->start );
 659             }
 660             hb_list_add( pv->list, buf );
 661         }
 662
 663         // add the new frame to the delayq & push its timestamp on the heap
 664         pv->delayq[slot] = copy_frame( pv, &frame );
 665         heap_push( &pv->pts_heap, pts );
 666
 667         ++pv->nframes;
 668     }
 669
 670     return got_picture;
 671 }
 672
 673 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 674                          int64_t pts, int64_t dts )
 675 {
 676     /*
 677      * The following loop is a do..while because we need to handle both
 678      * data & the flush at the end (signaled by size=0). At the end there's
 679      * generally a frame in the parser & one or more frames in the decoder
 680      * (depending on the bframes setting).
 681      */
 682     int pos = 0;
 683     do {
 684         uint8_t *pout;
 685         int pout_len;
 686         int len = av_parser_parse( pv->parser, pv->context, &pout, &pout_len,
 687                                    data + pos, size - pos, pts, dts );
 688         pos += len;
 689
 690         if ( pout_len > 0 )
 691         {
 692             pv->pts = pv->parser->pts;
 693             decodeFrame( pv, pout, pout_len );
 694         }
 695     } while ( pos < size );
 696
 697     /* the stuff above flushed the parser, now flush the decoder */
 698     if ( size <= 0 )
 699     {
 700         while ( decodeFrame( pv, NULL, 0 ) )
 701         {
 702         }
 703         flushDelayQueue( pv );
 704     }
 705 }
 706
 707 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 708 {
 709     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 710
 711     if ( head )
 712     {
 713         hb_list_rem( pv->list, head );
 714
 715         hb_buffer_t *last = head, *buf;
 716
 717         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 718         {
 719             hb_list_rem( pv->list, buf );
 720             last->next = buf;
 721             last = buf;
 722         }
 723     }
 724     return head;
 725 }
 726
 727
 728 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 729 {
 730
 731     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 732     w->private_data = pv;
 733     pv->job   = job;
 734     pv->list = hb_list_init();
 735
 736     int codec_id = w->codec_param;
 737     pv->parser = av_parser_init( codec_id );
 738     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 739
 740     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 741     pv->context->opaque = pv;
 742     pv->context->get_buffer = get_frame_buf;
 743
 744     return 0;
 745 }
 746
 747 static int next_hdr( hb_buffer_t *in, int offset )
 748 {
 749     uint8_t *dat = in->data;
 750     uint16_t last2 = 0xffff;
 751     for ( ; in->size - offset > 1; ++offset )
 752     {
 753         if ( last2 == 0 && dat[offset] == 0x01 )
 754             // found an mpeg start code
 755             return offset - 2;
 756
 757         last2 = ( last2 << 8 ) | dat[offset];
 758     }
 759
 760     return -1;
 761 }
 762
 763 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 764 {
 765     if ( in->size - offset < 4 )
 766         // not enough room for an mpeg start code
 767         return -1;
 768
 769     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 770     {
 771         if ( in->data[offset+3] == hdr_type )
 772             // found it
 773             break;
 774     }
 775     return offset;
 776 }
 777
 778 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 779 {
 780     hb_work_private_t *pv = w->private_data;
 781
 782     // we can't call the avstream funcs but the read_header func in the
 783     // AVInputFormat may set up some state in the AVContext. In particular
 784     // vc1t_read_header allocates 'extradata' to deal with header issues
 785     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 786     // of space to make vc1 work then associate the codec with the context.
 787     if ( w->codec_param != CODEC_ID_VC1 )
 788     {
 789         // we haven't been inflicted with M$ - allocate a little space as
 790         // a marker and return success.
 791         pv->context->extradata_size = 16;
 792         pv->context->extradata = av_malloc(pv->context->extradata_size);
 793         return 0;
 794     }
 795
 796     // find the start and and of the sequence header
 797     int shdr, shdr_end;
 798     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 799     {
 800         // didn't find start of seq hdr
 801         return 1;
 802     }
 803     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 804     {
 805         shdr_end = in->size;
 806     }
 807     shdr_end -= shdr;
 808
 809     // find the start and and of the entry point header
 810     int ehdr, ehdr_end;
 811     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 812     {
 813         // didn't find start of entry point hdr
 814         return 1;
 815     }
 816     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 817     {
 818         ehdr_end = in->size;
 819     }
 820     ehdr_end -= ehdr;
 821
 822     // found both headers - allocate an extradata big enough to hold both
 823     // then copy them into it.
 824     pv->context->extradata_size = shdr_end + ehdr_end;
 825     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 826     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 827     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 828     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 829     return 0;
 830 }
 831
 832 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 833                             hb_buffer_t ** buf_out )
 834 {
 835     hb_work_private_t *pv = w->private_data;
 836     hb_buffer_t *in = *buf_in;
 837     int64_t pts = AV_NOPTS_VALUE;
 838     int64_t dts = pts;
 839
 840     *buf_in = NULL;
 841
 842     /* if we got an empty buffer signaling end-of-stream send it downstream */
 843     if ( in->size == 0 )
 844     {
 845         decodeVideo( pv, in->data, in->size, pts, dts );
 846         hb_list_add( pv->list, in );
 847         *buf_out = link_buf_list( pv );
 848         return HB_WORK_DONE;
 849     }
 850
 851     // if this is the first frame open the codec (we have to wait for the
 852     // first frame because of M$ VC1 braindamage).
 853     if ( pv->context->extradata_size == 0 )
 854     {
 855         if ( setup_extradata( w, in ) )
 856         {
 857             // we didn't find the headers needed to set up extradata.
 858             // the codec will abort if we open it so just free the buf
 859             // and hope we eventually get the info we need.
 860             hb_buffer_close( &in );
 861             return HB_WORK_OK;
 862         }
 863         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 864         // There's a mis-feature in ffmpeg that causes the context to be
 865         // incorrectly initialized the 1st time avcodec_open is called.
 866         // If you close it and open a 2nd time, it finishes the job.
 867         hb_avcodec_open( pv->context, codec );
 868         hb_avcodec_close( pv->context );
 869         hb_avcodec_open( pv->context, codec );
 870     }
 871
 872     if( in->start >= 0 )
 873     {
 874         pts = in->start;
 875         dts = in->renderOffset;
 876     }
 877     if ( in->new_chap )
 878     {
 879         pv->new_chap = in->new_chap;
 880         pv->chap_time = pts >= 0? pts : pv->pts_next;
 881     }
 882     decodeVideo( pv, in->data, in->size, pts, dts );
 883     hb_buffer_close( &in );
 884     *buf_out = link_buf_list( pv );
 885     return HB_WORK_OK;
 886 }
 887
 888 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 889 {
 890     hb_work_private_t *pv = w->private_data;
 891
 892     memset( info, 0, sizeof(*info) );
 893
 894     if ( pv && pv->context )
 895     {
 896         AVCodecContext *context = pv->context;
 897         info->bitrate = context->bit_rate;
 898         info->width = context->width;
 899         info->height = context->height;
 900
 901         /* ffmpeg gives the frame rate in frames per second while HB wants
 902          * it in units of the 27MHz MPEG clock. */
 903         info->rate = 27000000;
 904         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 905                           context->time_base.den;
 906         if ( context->ticks_per_frame > 1 )
 907         {
 908             // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
 909             // field rate rather than frame rate so convert back to frames.
 910             info->rate_base *= context->ticks_per_frame;
 911         }
 912
 913         /* Sometimes there's no pixel aspect set in the source. In that case,
 914            assume a 1:1 PAR. Otherwise, preserve the source PAR.             */
 915         info->pixel_aspect_width = context->sample_aspect_ratio.num ?
 916                                         context->sample_aspect_ratio.num : 1;
 917         info->pixel_aspect_height = context->sample_aspect_ratio.den ?
 918                                         context->sample_aspect_ratio.den : 1;
 919
 920         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 921          * Display Aspect Ratio so we convert by scaling by the Storage
 922          * Aspect Ratio (w/h). We do the calc in floating point to get the
 923          * rounding right. */
 924         info->aspect = (double)info->pixel_aspect_width *
 925                        (double)context->width /
 926                        (double)info->pixel_aspect_height /
 927                        (double)context->height;
 928
 929         info->profile = context->profile;
 930         info->level = context->level;
 931         info->name = context->codec->name;
 932         return 1;
 933     }
 934     return 0;
 935 }
 936
 937 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 938                              hb_work_info_t *info )
 939 {
 940     return 0;
 941 }
 942
 943 hb_work_object_t hb_decavcodecv =
 944 {
 945     WORK_DECAVCODECV,
 946     "Video decoder (libavcodec)",
 947     decavcodecvInit,
 948     decavcodecvWork,
 949     decavcodecClose,
 950     decavcodecvInfo,
 951     decavcodecvBSInfo
 952 };
 953
 954
 955 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 956 // includes a parser and passes information from the parser to the decoder
 957 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 958 // We *have* to use that codec context to decode the stream or we'll get
 959 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 960 // codec context in our codec_param.
 961
 962 // this routine gets the appropriate context pointer from the ffmpeg
 963 // stream reader. it can't be called until we get the first buffer because
 964 // we can't guarantee that reader will be called before the our init
 965 // routine and if our init is called first we'll get a pointer to the
 966 // old scan stream (which has already been closed).
 967 static void init_ffmpeg_context( hb_work_object_t *w )
 968 {
 969     hb_work_private_t *pv = w->private_data;
 970     pv->context = hb_ffmpeg_context( w->codec_param );
 971
 972     // during scan the decoder gets closed & reopened which will
 973     // close the codec so reopen it if it's not there
 974     if ( ! pv->context->codec )
 975     {
 976         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 977         hb_avcodec_open( pv->context, codec );
 978     }
 979     // set up our best guess at the frame duration.
 980     // the frame rate in the codec is usually bogus but it's sometimes
 981     // ok in the stream.
 982     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 983
 984     if ( st->nb_frames && st->duration )
 985     {
 986         // compute the average frame duration from the total number
 987         // of frames & the total duration.
 988         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 989                        ( (double)st->nb_frames * (double)st->time_base.den );
 990     }
 991     else
 992     {
 993         // XXX We don't have a frame count or duration so try to use the
 994         // far less reliable time base info in the stream.
 995         // Because the time bases are so screwed up, we only take values
 996         // in the range 8fps - 64fps.
 997         AVRational tb;
 998         if ( st->time_base.num * 64 > st->time_base.den &&
 999              st->time_base.den > st->time_base.num * 8 )
1000         {
1001             tb = st->time_base;
1002         }
1003         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
1004                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
1005         {
1006             tb.num = st->r_frame_rate.den;
1007             tb.den = st->r_frame_rate.num;
1008         }
1009         else
1010         {
1011             tb.num = 1001;  /*XXX*/
1012             tb.den = 24000; /*XXX*/
1013         }
1014         pv->duration =  (double)tb.num / (double)tb.den;
1015     }
1016     pv->duration *= 90000.;
1017
1018     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1019     pv->context->opaque = pv;
1020     pv->context->get_buffer = get_frame_buf;
1021
1022     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1023     // b-frames abortion that messes up frame ordering and timestamps.
1024     // XXX ffmpeg knows which streams are broken but doesn't expose the
1025     //     info externally. We should patch ffmpeg to add a flag to the
1026     //     codec context for this but until then we mark all ffmpeg streams
1027     //     as suspicious.
1028     pv->brokenByMicrosoft = 1;
1029 }
1030
1031 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1032 {
1033     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1034     // will seg fault in odd, data dependent ways if it's not there. (my guess
1035     // is this is a case of a local performance optimization creating a global
1036     // performance degradation since all the time wasted by extraneous data
1037     // copies & memory zeroing has to be huge compared to the minor reduction
1038     // in inner-loop instructions this affords - modern cpus bottleneck on
1039     // memory bandwidth not instruction bandwidth).
1040     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1041     {
1042         // have to realloc to add the padding
1043         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1044     }
1045     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1046 }
1047
1048 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1049 {
1050
1051     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1052     w->private_data = pv;
1053     pv->job   = job;
1054     pv->list = hb_list_init();
1055     pv->pts_next = -1;
1056     pv->pts = -1;
1057     return 0;
1058 }
1059
1060 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1061                              hb_buffer_t ** buf_out )
1062 {
1063     hb_work_private_t *pv = w->private_data;
1064     if ( ! pv->context )
1065     {
1066         init_ffmpeg_context( w );
1067     }
1068     hb_buffer_t *in = *buf_in;
1069     *buf_in = NULL;
1070
1071     /* if we got an empty buffer signaling end-of-stream send it downstream */
1072     if ( in->size == 0 )
1073     {
1074         /* flush any frames left in the decoder */
1075         while ( decodeFrame( pv, NULL, 0 ) )
1076         {
1077         }
1078         flushDelayQueue( pv );
1079         hb_list_add( pv->list, in );
1080         *buf_out = link_buf_list( pv );
1081         return HB_WORK_DONE;
1082     }
1083
1084     int64_t pts = in->start;
1085     if( pts >= 0 )
1086     {
1087         // use the first timestamp as our 'next expected' pts
1088         if ( pv->pts_next < 0 )
1089         {
1090             pv->pts_next = pts;
1091         }
1092         pv->pts = pts;
1093     }
1094
1095     if ( in->new_chap )
1096     {
1097         pv->new_chap = in->new_chap;
1098         pv->chap_time = pts >= 0? pts : pv->pts_next;
1099     }
1100     prepare_ffmpeg_buffer( in );
1101     decodeFrame( pv, in->data, in->size );
1102     hb_buffer_close( &in );
1103     *buf_out = link_buf_list( pv );
1104     return HB_WORK_OK;
1105 }
1106
1107 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1108 {
1109     if ( decavcodecvInfo( w, info ) )
1110     {
1111         hb_work_private_t *pv = w->private_data;
1112         if ( ! pv->context )
1113         {
1114             init_ffmpeg_context( w );
1115         }
1116         // we have the frame duration in units of the 90KHz pts clock but
1117         // need it in units of the 27MHz MPEG clock. */
1118         info->rate = 27000000;
1119         info->rate_base = pv->duration * 300.;
1120         return 1;
1121     }
1122     return 0;
1123 }
1124
1125 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1126 {
1127     AVCodecContext *context = pv->context;
1128     int pos = 0;
1129
1130     while ( pos < size )
1131     {
1132         int16_t *buffer = pv->buffer;
1133         if ( buffer == NULL )
1134         {
1135             // XXX ffmpeg bug workaround
1136             // malloc a buffer for the audio decode. On an x86, ffmpeg
1137             // uses mmx/sse instructions on this buffer without checking
1138             // that it's 16 byte aligned and this will cause an abort if
1139             // the buffer is allocated on our stack. Rather than doing
1140             // complicated, machine dependent alignment here we use the
1141             // fact that malloc returns an aligned pointer on most architectures.
1142
1143             #if defined( SYS_CYGWIN )
1144                 // Cygwin's malloc doesn't appear to return 16-byte aligned memory so use memalign instead.
1145                pv->buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
1146             #else
1147                 pv->buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1148             #endif
1149
1150             buffer = pv->buffer;
1151         }
1152         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1153         int len = avcodec_decode_audio2( context, buffer, &out_size,
1154                                          data + pos, size - pos );
1155         if ( len <= 0 )
1156         {
1157             return;
1158         }
1159         pos += len;
1160         if( out_size > 0 )
1161         {
1162             // We require signed 16-bit ints for the output format. If
1163             // we got something different convert it.
1164             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1165             {
1166                 // Note: av_audio_convert seems to be a work-in-progress but
1167                 //       looks like it will eventually handle general audio
1168                 //       mixdowns which would allow us much more flexibility
1169                 //       in handling multichannel audio in HB. If we were doing
1170                 //       anything more complicated than a one-for-one format
1171                 //       conversion we'd probably want to cache the converter
1172                 //       context in the pv.
1173                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1174                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1175                                                               context->sample_fmt, 1,
1176                                                               NULL, 0 );
1177                 // get output buffer size (in 2-byte samples) then malloc a buffer
1178                 out_size = ( out_size * 2 ) / isamp;
1179                 buffer = malloc( out_size );
1180
1181                 // we're doing straight sample format conversion which behaves as if
1182                 // there were only one channel.
1183                 const void * const ibuf[6] = { pv->buffer };
1184                 void * const obuf[6] = { buffer };
1185                 const int istride[6] = { isamp };
1186                 const int ostride[6] = { 2 };
1187
1188                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1189                 av_audio_convert_free( ctx );
1190             }
1191             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1192
1193             // convert from bytes to total samples
1194             out_size >>= 1;
1195
1196             double pts = pv->pts_next;
1197             buf->start = pts;
1198             pts += out_size * pv->duration;
1199             buf->stop  = pts;
1200             pv->pts_next = pts;
1201
1202             float *fl32 = (float *)buf->data;
1203             int i;
1204             for( i = 0; i < out_size; ++i )
1205             {
1206                 fl32[i] = buffer[i];
1207             }
1208             hb_list_add( pv->list, buf );
1209
1210             // if we allocated a buffer for sample format conversion, free it
1211             if ( buffer != pv->buffer )
1212             {
1213                 free( buffer );
1214             }
1215         }
1216     }
1217 }
1218
1219 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1220                     hb_buffer_t **buf_out )
1221 {
1222     if ( (*buf_in)->size <= 0 )
1223     {
1224         /* EOF on input stream - send it downstream & say that we're done */
1225         *buf_out = *buf_in;
1226         *buf_in = NULL;
1227         return HB_WORK_DONE;
1228     }
1229
1230     hb_work_private_t *pv = w->private_data;
1231
1232     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1233     {
1234         // discard buffers that start before video time 0
1235         *buf_out = NULL;
1236         return HB_WORK_OK;
1237     }
1238
1239     if ( ! pv->context )
1240     {
1241         init_ffmpeg_context( w );
1242         // duration is a scaling factor to go from #bytes in the decoded
1243         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1244         // total samples to per-channel samples. 'sample_rate' converts
1245         // per-channel samples to seconds per sample and the 90000
1246         // is mpeg ticks per second.
1247         pv->duration = 90000. /
1248                     (double)( pv->context->sample_rate * pv->context->channels );
1249     }
1250     hb_buffer_t *in = *buf_in;
1251
1252     // if the packet has a timestamp use it if we don't have a timestamp yet
1253     // or if there's been a timing discontinuity of more than 100ms.
1254     if ( in->start >= 0 &&
1255          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1256     {
1257         pv->pts_next = in->start;
1258     }
1259     prepare_ffmpeg_buffer( in );
1260     decodeAudio( pv, in->data, in->size );
1261     *buf_out = link_buf_list( pv );
1262
1263     return HB_WORK_OK;
1264 }
1265
1266 hb_work_object_t hb_decavcodecvi =
1267 {
1268     WORK_DECAVCODECVI,
1269     "Video decoder (ffmpeg streams)",
1270     decavcodecviInit,
1271     decavcodecviWork,
1272     decavcodecClose,
1273     decavcodecviInfo,
1274     decavcodecvBSInfo
1275 };
1276
1277 hb_work_object_t hb_decavcodecai =
1278 {
1279     WORK_DECAVCODECAI,
1280     "Audio decoder (ffmpeg streams)",
1281     decavcodecviInit,
1282     decavcodecaiWork,
1283     decavcodecClose,
1284     decavcodecInfo,
1285     decavcodecBSInfo
1286 };