libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size );
 114 static hb_buffer_t *link_buf_list( hb_work_private_t *pv );
 115
 116
 117 static int64_t heap_pop( pts_heap_t *heap )
 118 {
 119     int64_t result;
 120
 121     if ( heap->nheap <= 0 )
 122     {
 123         return -1;
 124     }
 125
 126     // return the top of the heap then put the bottom element on top,
 127     // decrease the heap size by one & rebalence the heap.
 128     result = heap->h[1];
 129
 130     int64_t v = heap->h[heap->nheap--];
 131     int parent = 1;
 132     int child = parent << 1;
 133     while ( child <= heap->nheap )
 134     {
 135         // find the smallest of the two children of parent
 136         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 137             ++child;
 138
 139         if (v <= heap->h[child])
 140             // new item is smaller than either child so it's the new parent.
 141             break;
 142
 143         // smallest child is smaller than new item so move it up then
 144         // check its children.
 145         int64_t hp = heap->h[child];
 146         heap->h[parent] = hp;
 147         parent = child;
 148         child = parent << 1;
 149     }
 150     heap->h[parent] = v;
 151     return result;
 152 }
 153
 154 static void heap_push( pts_heap_t *heap, int64_t v )
 155 {
 156     if ( heap->nheap < HEAP_SIZE )
 157     {
 158         ++heap->nheap;
 159     }
 160
 161     // stick the new value on the bottom of the heap then bubble it
 162     // up to its correct spot.
 163         int child = heap->nheap;
 164         while (child > 1) {
 165                 int parent = child >> 1;
 166                 if (heap->h[parent] <= v)
 167                         break;
 168                 // move parent down
 169                 int64_t hp = heap->h[parent];
 170                 heap->h[child] = hp;
 171                 child = parent;
 172         }
 173         heap->h[child] = v;
 174 }
 175
 176
 177 /***********************************************************************
 178  * hb_work_decavcodec_init
 179  ***********************************************************************
 180  *
 181  **********************************************************************/
 182 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 183 {
 184     AVCodec * codec;
 185
 186     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 187     w->private_data = pv;
 188
 189     pv->job   = job;
 190     pv->list  = hb_list_init();
 191
 192     int codec_id = w->codec_param;
 193     /*XXX*/
 194     if ( codec_id == 0 )
 195         codec_id = CODEC_ID_MP2;
 196
 197     codec = avcodec_find_decoder( codec_id );
 198     pv->parser = av_parser_init( codec_id );
 199
 200     pv->context = avcodec_alloc_context();
 201     hb_avcodec_open( pv->context, codec );
 202
 203     return 0;
 204 }
 205
 206 /***********************************************************************
 207  * Close
 208  ***********************************************************************
 209  *
 210  **********************************************************************/
 211 static void decavcodecClose( hb_work_object_t * w )
 212 {
 213     hb_work_private_t * pv = w->private_data;
 214
 215     if ( pv )
 216     {
 217         if ( pv->job && pv->context && pv->context->codec )
 218         {
 219             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 220                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 221                     pv->ndrops );
 222         }
 223         if ( pv->sws_context )
 224         {
 225             sws_freeContext( pv->sws_context );
 226         }
 227         if ( pv->parser )
 228         {
 229             av_parser_close(pv->parser);
 230         }
 231         if ( pv->context && pv->context->codec )
 232         {
 233             hb_avcodec_close( pv->context );
 234         }
 235         if ( pv->list )
 236         {
 237             hb_list_close( &pv->list );
 238         }
 239         if ( pv->buffer )
 240         {
 241             av_free( pv->buffer );
 242             pv->buffer = NULL;
 243         }
 244         free( pv );
 245         w->private_data = NULL;
 246     }
 247 }
 248
 249 /***********************************************************************
 250  * Work
 251  ***********************************************************************
 252  *
 253  **********************************************************************/
 254 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 255                     hb_buffer_t ** buf_out )
 256 {
 257     hb_work_private_t * pv = w->private_data;
 258     hb_buffer_t * in = *buf_in;
 259
 260     if ( in->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     // if the packet has a timestamp use it
 277     if ( in->start != -1 )
 278     {
 279         pv->pts_next = in->start;
 280     }
 281
 282     int pos, len;
 283     for ( pos = 0; pos < in->size; pos += len )
 284     {
 285         uint8_t *parser_output_buffer;
 286         int parser_output_buffer_len;
 287         int64_t cur = pv->pts_next;
 288
 289         if ( pv->parser != NULL )
 290         {
 291             len = av_parser_parse2( pv->parser, pv->context,
 292                     &parser_output_buffer, &parser_output_buffer_len,
 293                     in->data + pos, in->size - pos, cur, cur, AV_NOPTS_VALUE );
 294         }
 295         else
 296         {
 297             parser_output_buffer = in->data;
 298             len = parser_output_buffer_len = in->size;
 299         }
 300         if (parser_output_buffer_len)
 301         {
 302             // set the duration on every frame since the stream format can
 303             // change (it shouldn't but there's no way to guarantee it).
 304             // duration is a scaling factor to go from #bytes in the decoded
 305             // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
 306             // total samples to per-channel samples. 'sample_rate' converts
 307             // per-channel samples to seconds per sample and the 90000
 308             // is mpeg ticks per second.
 309             if ( pv->context->sample_rate && pv->context->channels )
 310             {
 311                 pv->duration = 90000. /
 312                             (double)( pv->context->sample_rate * pv->context->channels );
 313             }
 314             decodeAudio( pv, parser_output_buffer, parser_output_buffer_len );
 315         }
 316     }
 317     *buf_out = link_buf_list( pv );
 318     return HB_WORK_OK;
 319 }
 320
 321 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 322 {
 323     hb_work_private_t *pv = w->private_data;
 324
 325     memset( info, 0, sizeof(*info) );
 326
 327     if ( pv && pv->context )
 328     {
 329         AVCodecContext *context = pv->context;
 330         info->bitrate = context->bit_rate;
 331         info->rate = context->time_base.num;
 332         info->rate_base = context->time_base.den;
 333         info->profile = context->profile;
 334         info->level = context->level;
 335         return 1;
 336     }
 337     return 0;
 338 }
 339
 340 static const int chan2layout[] = {
 341     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 342     HB_INPUT_CH_LAYOUT_MONO,
 343     HB_INPUT_CH_LAYOUT_STEREO,
 344     HB_INPUT_CH_LAYOUT_2F1R,
 345     HB_INPUT_CH_LAYOUT_2F2R,
 346     HB_INPUT_CH_LAYOUT_3F2R,
 347     HB_INPUT_CH_LAYOUT_4F2R,
 348     HB_INPUT_CH_LAYOUT_STEREO,
 349     HB_INPUT_CH_LAYOUT_STEREO,
 350 };
 351
 352 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 353                              hb_work_info_t *info )
 354 {
 355     hb_work_private_t *pv = w->private_data;
 356     int ret = 0;
 357
 358     memset( info, 0, sizeof(*info) );
 359
 360     if ( pv && pv->context )
 361     {
 362         return decavcodecInfo( w, info );
 363     }
 364     // XXX
 365     // We should parse the bitstream to find its parameters but for right
 366     // now we just return dummy values if there's a codec that will handle it.
 367     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 368                                                            CODEC_ID_MP2 );
 369     if ( ! codec )
 370     {
 371         // there's no ffmpeg codec for this audio type - give up
 372         return -1;
 373     }
 374
 375     static char codec_name[64];
 376     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 377
 378     AVCodecParserContext *parser = av_parser_init( codec->id );
 379     AVCodecContext *context = avcodec_alloc_context();
 380     hb_avcodec_open( context, codec );
 381     uint8_t *buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 382     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 383     unsigned char *pbuffer;
 384     int pos = 0, pbuffer_size;
 385
 386     while ( pos < buf->size )
 387     {
 388         int len;
 389
 390         if (parser != NULL )
 391         {
 392             len = av_parser_parse2( parser, context, &pbuffer, &pbuffer_size,
 393                                     buf->data + pos, buf->size - pos,
 394                                     buf->start, buf->start, AV_NOPTS_VALUE );
 395         }
 396         else
 397         {
 398             pbuffer = buf->data;
 399             len = pbuffer_size = buf->size;
 400         }
 401         pos += len;
 402         if ( pbuffer_size > 0 )
 403         {
 404             AVPacket avp;
 405             av_init_packet( &avp );
 406             avp.data = pbuffer;
 407             avp.size = pbuffer_size;
 408
 409             len = avcodec_decode_audio3( context, (int16_t*)buffer, &out_size, &avp );
 410             if ( len > 0 && context->sample_rate > 0 )
 411             {
 412                 info->bitrate = context->bit_rate;
 413                 info->rate = context->sample_rate;
 414                 info->rate_base = 1;
 415                 info->channel_layout = chan2layout[context->channels & 7];
 416                 ret = 1;
 417                 break;
 418             }
 419         }
 420     }
 421     av_free( buffer );
 422     if ( parser != NULL )
 423         av_parser_close( parser );
 424     hb_avcodec_close( context );
 425     return ret;
 426 }
 427
 428 /* -------------------------------------------------------------
 429  * General purpose video decoder using libavcodec
 430  */
 431
 432 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 433                             int h )
 434 {
 435     if ( dstride == sstride )
 436     {
 437         memcpy( dst, src, dstride * h );
 438         return dst + dstride * h;
 439     }
 440     int lbytes = dstride <= sstride? dstride : sstride;
 441     while ( --h >= 0 )
 442     {
 443         memcpy( dst, src, lbytes );
 444         src += sstride;
 445         dst += dstride;
 446     }
 447     return dst;
 448 }
 449
 450 // copy one video frame into an HB buf. If the frame isn't in our color space
 451 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 452 // Otherwise just copy the bits.
 453 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 454 {
 455     AVCodecContext *context = pv->context;
 456     int w, h;
 457     if ( ! pv->job )
 458     {
 459         // if the dimensions are odd, drop the lsb since h264 requires that
 460         // both width and height be even.
 461         w = ( context->width >> 1 ) << 1;
 462         h = ( context->height >> 1 ) << 1;
 463     }
 464     else
 465     {
 466         w =  pv->job->title->width;
 467         h =  pv->job->title->height;
 468     }
 469     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 470     uint8_t *dst = buf->data;
 471
 472     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 473          h != context->height )
 474     {
 475         // have to convert to our internal color space and/or rescale
 476         AVPicture dstpic;
 477         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 478
 479         if ( ! pv->sws_context )
 480         {
 481             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 482                                               w, h, PIX_FMT_YUV420P,
 483                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 484                                               NULL, NULL, NULL );
 485         }
 486         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 487                    dstpic.data, dstpic.linesize );
 488     }
 489     else
 490     {
 491         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 492         w = (w + 1) >> 1; h = (h + 1) >> 1;
 493         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 494         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 495     }
 496     return buf;
 497 }
 498
 499 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 500 {
 501     hb_work_private_t *pv = context->opaque;
 502     frame->pts = pv->pts;
 503     pv->pts = -1;
 504     return avcodec_default_get_buffer( context, frame );
 505 }
 506
 507 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 508 {
 509     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 510     if ( c && c->title )
 511     {
 512         hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
 513                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 514     }
 515     else
 516     {
 517         hb_log( "%s: Chapter %d at frame %u time %"PRId64,
 518                 pv->context->codec->name, chap_num, pv->nframes, pts );
 519     }
 520 }
 521
 522 static void flushDelayQueue( hb_work_private_t *pv )
 523 {
 524     hb_buffer_t *buf;
 525     int slot = pv->nframes & (HEAP_SIZE-1);
 526
 527     // flush all the video packets left on our timestamp-reordering delay q
 528     while ( ( buf = pv->delayq[slot] ) != NULL )
 529     {
 530         buf->start = heap_pop( &pv->pts_heap );
 531         hb_list_add( pv->list, buf );
 532         pv->delayq[slot] = NULL;
 533         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 534     }
 535 }
 536
 537 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 538 {
 539     int got_picture, oldlevel = 0;
 540     AVFrame frame;
 541     AVPacket avp;
 542
 543     if ( global_verbosity_level <= 1 )
 544     {
 545         oldlevel = av_log_get_level();
 546         av_log_set_level( AV_LOG_QUIET );
 547     }
 548
 549     av_init_packet( &avp );
 550     avp.data = data;
 551     avp.size = size;
 552     if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
 553     {
 554         ++pv->decode_errors;
 555     }
 556     if ( global_verbosity_level <= 1 )
 557     {
 558         av_log_set_level( oldlevel );
 559     }
 560     if( got_picture )
 561     {
 562         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 563         // packet had a pts we handed it to av_parser_parse (if the packet had
 564         // no pts we set it to -1 but before the parse we can't distinguish between
 565         // the start of a video frame with no pts & an intermediate packet of
 566         // some frame which never has a pts). we hope that when parse returns
 567         // the frame to us the pts we originally handed it will be in parser->pts.
 568         // we put this pts into pv->pts so that when a avcodec_decode_video
 569         // finally gets around to allocating an AVFrame to hold the decoded
 570         // frame we can stuff that pts into the frame. if all of these relays
 571         // worked at this point frame.pts should hold the frame's pts from the
 572         // original data stream or -1 if it didn't have one. in the latter case
 573         // we generate the next pts in sequence for it.
 574         double frame_dur = pv->duration;
 575         if ( frame_dur <= 0 )
 576         {
 577             frame_dur = 90000. * (double)pv->context->time_base.num /
 578                         (double)pv->context->time_base.den;
 579             pv->duration = frame_dur;
 580         }
 581         if ( frame.repeat_pict )
 582         {
 583             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 584         }
 585         // XXX Unlike every other video decoder, the Raw decoder doesn't
 586         //     use the standard buffer allocation routines so we never
 587         //     get to put a PTS in the frame. Do it now.
 588         if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
 589         {
 590             frame.pts = pv->pts;
 591             pv->pts = -1;
 592         }
 593         // If there was no pts for this frame, assume constant frame rate
 594         // video & estimate the next frame time from the last & duration.
 595         double pts = frame.pts;
 596         if ( pts < 0 )
 597         {
 598             pts = pv->pts_next;
 599         }
 600         pv->pts_next = pts + frame_dur;
 601
 602         hb_buffer_t *buf;
 603
 604         // if we're doing a scan or this content couldn't have been broken
 605         // by Microsoft we don't worry about timestamp reordering
 606         if ( ! pv->job || ! pv->brokenByMicrosoft )
 607         {
 608             buf = copy_frame( pv, &frame );
 609             buf->start = pts;
 610             hb_list_add( pv->list, buf );
 611             ++pv->nframes;
 612             return got_picture;
 613         }
 614
 615         // XXX This following probably addresses a libavcodec bug but I don't
 616         //     see an easy fix so we workaround it here.
 617         //
 618         // The M$ 'packed B-frames' atrocity results in decoded frames with
 619         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 620         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 621         // The frames are actually delivered in the right order but with
 622         // the wrong timestamp. To get the correct timestamp attached to
 623         // each frame we have a delay queue (longer than the max number of
 624         // b-frames) & a sorting heap for the timestamps. As each frame
 625         // comes out of the decoder the oldest frame in the queue is removed
 626         // and associated with the smallest timestamp. Then the new frame is
 627         // added to the queue & its timestamp is pushed on the heap.
 628         // This does nothing if the timestamps are correct (i.e., the video
 629         // uses a codec that Micro$oft hasn't broken yet) but the frames
 630         // get timestamped correctly even when M$ has munged them.
 631
 632         // remove the oldest picture from the frame queue (if any) &
 633         // give it the smallest timestamp from our heap. The queue size
 634         // is a power of two so we get the slot of the oldest by masking
 635         // the frame count & this will become the slot of the newest
 636         // once we've removed & processed the oldest.
 637         int slot = pv->nframes & (HEAP_SIZE-1);
 638         if ( ( buf = pv->delayq[slot] ) != NULL )
 639         {
 640             buf->start = heap_pop( &pv->pts_heap );
 641
 642             if ( pv->new_chap && buf->start >= pv->chap_time )
 643             {
 644                 buf->new_chap = pv->new_chap;
 645                 pv->new_chap = 0;
 646                 pv->chap_time = 0;
 647                 log_chapter( pv, buf->new_chap, buf->start );
 648             }
 649             else if ( pv->nframes == 0 )
 650             {
 651                 log_chapter( pv, pv->job->chapter_start, buf->start );
 652             }
 653             hb_list_add( pv->list, buf );
 654         }
 655
 656         // add the new frame to the delayq & push its timestamp on the heap
 657         pv->delayq[slot] = copy_frame( pv, &frame );
 658         heap_push( &pv->pts_heap, pts );
 659
 660         ++pv->nframes;
 661     }
 662
 663     return got_picture;
 664 }
 665
 666 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 667                          int64_t pts, int64_t dts )
 668 {
 669     /*
 670      * The following loop is a do..while because we need to handle both
 671      * data & the flush at the end (signaled by size=0). At the end there's
 672      * generally a frame in the parser & one or more frames in the decoder
 673      * (depending on the bframes setting).
 674      */
 675     int pos = 0;
 676     do {
 677         uint8_t *pout;
 678         int pout_len;
 679         int len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
 680                                     data + pos, size - pos, pts, dts, AV_NOPTS_VALUE );
 681         pos += len;
 682
 683         if ( pout_len > 0 )
 684         {
 685             pv->pts = pv->parser->pts;
 686             decodeFrame( pv, pout, pout_len );
 687         }
 688     } while ( pos < size );
 689
 690     /* the stuff above flushed the parser, now flush the decoder */
 691     if ( size <= 0 )
 692     {
 693         while ( decodeFrame( pv, NULL, 0 ) )
 694         {
 695         }
 696         flushDelayQueue( pv );
 697     }
 698 }
 699
 700 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 701 {
 702     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 703
 704     if ( head )
 705     {
 706         hb_list_rem( pv->list, head );
 707
 708         hb_buffer_t *last = head, *buf;
 709
 710         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 711         {
 712             hb_list_rem( pv->list, buf );
 713             last->next = buf;
 714             last = buf;
 715         }
 716     }
 717     return head;
 718 }
 719
 720
 721 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 722 {
 723
 724     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 725     w->private_data = pv;
 726     pv->job   = job;
 727     pv->list = hb_list_init();
 728
 729     int codec_id = w->codec_param;
 730     pv->parser = av_parser_init( codec_id );
 731     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 732
 733     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 734     pv->context->opaque = pv;
 735     pv->context->get_buffer = get_frame_buf;
 736
 737     return 0;
 738 }
 739
 740 static int next_hdr( hb_buffer_t *in, int offset )
 741 {
 742     uint8_t *dat = in->data;
 743     uint16_t last2 = 0xffff;
 744     for ( ; in->size - offset > 1; ++offset )
 745     {
 746         if ( last2 == 0 && dat[offset] == 0x01 )
 747             // found an mpeg start code
 748             return offset - 2;
 749
 750         last2 = ( last2 << 8 ) | dat[offset];
 751     }
 752
 753     return -1;
 754 }
 755
 756 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 757 {
 758     if ( in->size - offset < 4 )
 759         // not enough room for an mpeg start code
 760         return -1;
 761
 762     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 763     {
 764         if ( in->data[offset+3] == hdr_type )
 765             // found it
 766             break;
 767     }
 768     return offset;
 769 }
 770
 771 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 772 {
 773     hb_work_private_t *pv = w->private_data;
 774
 775     // we can't call the avstream funcs but the read_header func in the
 776     // AVInputFormat may set up some state in the AVContext. In particular
 777     // vc1t_read_header allocates 'extradata' to deal with header issues
 778     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 779     // of space to make vc1 work then associate the codec with the context.
 780     if ( w->codec_param != CODEC_ID_VC1 )
 781     {
 782         // we haven't been inflicted with M$ - allocate a little space as
 783         // a marker and return success.
 784         pv->context->extradata_size = 16;
 785         pv->context->extradata = av_malloc(pv->context->extradata_size);
 786         return 0;
 787     }
 788
 789     // find the start and and of the sequence header
 790     int shdr, shdr_end;
 791     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 792     {
 793         // didn't find start of seq hdr
 794         return 1;
 795     }
 796     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 797     {
 798         shdr_end = in->size;
 799     }
 800     shdr_end -= shdr;
 801
 802     // find the start and and of the entry point header
 803     int ehdr, ehdr_end;
 804     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 805     {
 806         // didn't find start of entry point hdr
 807         return 1;
 808     }
 809     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 810     {
 811         ehdr_end = in->size;
 812     }
 813     ehdr_end -= ehdr;
 814
 815     // found both headers - allocate an extradata big enough to hold both
 816     // then copy them into it.
 817     pv->context->extradata_size = shdr_end + ehdr_end;
 818     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 819     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 820     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 821     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 822     return 0;
 823 }
 824
 825 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 826                             hb_buffer_t ** buf_out )
 827 {
 828     hb_work_private_t *pv = w->private_data;
 829     hb_buffer_t *in = *buf_in;
 830     int64_t pts = AV_NOPTS_VALUE;
 831     int64_t dts = pts;
 832
 833     *buf_in = NULL;
 834
 835     /* if we got an empty buffer signaling end-of-stream send it downstream */
 836     if ( in->size == 0 )
 837     {
 838         decodeVideo( pv, in->data, in->size, pts, dts );
 839         hb_list_add( pv->list, in );
 840         *buf_out = link_buf_list( pv );
 841         return HB_WORK_DONE;
 842     }
 843
 844     // if this is the first frame open the codec (we have to wait for the
 845     // first frame because of M$ VC1 braindamage).
 846     if ( pv->context->extradata_size == 0 )
 847     {
 848         if ( setup_extradata( w, in ) )
 849         {
 850             // we didn't find the headers needed to set up extradata.
 851             // the codec will abort if we open it so just free the buf
 852             // and hope we eventually get the info we need.
 853             hb_buffer_close( &in );
 854             return HB_WORK_OK;
 855         }
 856         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 857         // There's a mis-feature in ffmpeg that causes the context to be
 858         // incorrectly initialized the 1st time avcodec_open is called.
 859         // If you close it and open a 2nd time, it finishes the job.
 860         hb_avcodec_open( pv->context, codec );
 861         hb_avcodec_close( pv->context );
 862         hb_avcodec_open( pv->context, codec );
 863     }
 864
 865     if( in->start >= 0 )
 866     {
 867         pts = in->start;
 868         dts = in->renderOffset;
 869     }
 870     if ( in->new_chap )
 871     {
 872         pv->new_chap = in->new_chap;
 873         pv->chap_time = pts >= 0? pts : pv->pts_next;
 874     }
 875     decodeVideo( pv, in->data, in->size, pts, dts );
 876     hb_buffer_close( &in );
 877     *buf_out = link_buf_list( pv );
 878     return HB_WORK_OK;
 879 }
 880
 881 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 882 {
 883     hb_work_private_t *pv = w->private_data;
 884
 885     memset( info, 0, sizeof(*info) );
 886
 887     if ( pv && pv->context )
 888     {
 889         AVCodecContext *context = pv->context;
 890         info->bitrate = context->bit_rate;
 891         info->width = context->width;
 892         info->height = context->height;
 893
 894         /* ffmpeg gives the frame rate in frames per second while HB wants
 895          * it in units of the 27MHz MPEG clock. */
 896         info->rate = 27000000;
 897         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 898                           context->time_base.den;
 899         if ( context->ticks_per_frame > 1 )
 900         {
 901             // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
 902             // field rate rather than frame rate so convert back to frames.
 903             info->rate_base *= context->ticks_per_frame;
 904         }
 905
 906         info->pixel_aspect_width = context->sample_aspect_ratio.num;
 907         info->pixel_aspect_height = context->sample_aspect_ratio.den;
 908
 909         /* Sometimes there's no pixel aspect set in the source ffmpeg context
 910          * which appears to come from the video stream. In that case,
 911          * try the pixel aspect in AVStream (which appears to come from
 912          * the container). Else assume a 1:1 PAR. */
 913         if ( info->pixel_aspect_width == 0 ||
 914              info->pixel_aspect_height == 0 )
 915         {
 916             AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 917             info->pixel_aspect_width = st->sample_aspect_ratio.num ?
 918                                         st->sample_aspect_ratio.num : 1;
 919             info->pixel_aspect_height = st->sample_aspect_ratio.den ?
 920                                         st->sample_aspect_ratio.den : 1;
 921         }
 922         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 923          * Display Aspect Ratio so we convert by scaling by the Storage
 924          * Aspect Ratio (w/h). We do the calc in floating point to get the
 925          * rounding right. */
 926         info->aspect = (double)info->pixel_aspect_width *
 927                        (double)context->width /
 928                        (double)info->pixel_aspect_height /
 929                        (double)context->height;
 930
 931         info->profile = context->profile;
 932         info->level = context->level;
 933         info->name = context->codec->name;
 934         return 1;
 935     }
 936     return 0;
 937 }
 938
 939 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 940                              hb_work_info_t *info )
 941 {
 942     return 0;
 943 }
 944
 945 hb_work_object_t hb_decavcodecv =
 946 {
 947     WORK_DECAVCODECV,
 948     "Video decoder (libavcodec)",
 949     decavcodecvInit,
 950     decavcodecvWork,
 951     decavcodecClose,
 952     decavcodecvInfo,
 953     decavcodecvBSInfo
 954 };
 955
 956
 957 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 958 // includes a parser and passes information from the parser to the decoder
 959 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 960 // We *have* to use that codec context to decode the stream or we'll get
 961 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 962 // codec context in our codec_param.
 963
 964 // this routine gets the appropriate context pointer from the ffmpeg
 965 // stream reader. it can't be called until we get the first buffer because
 966 // we can't guarantee that reader will be called before the our init
 967 // routine and if our init is called first we'll get a pointer to the
 968 // old scan stream (which has already been closed).
 969 static void init_ffmpeg_context( hb_work_object_t *w )
 970 {
 971     hb_work_private_t *pv = w->private_data;
 972     pv->context = hb_ffmpeg_context( w->codec_param );
 973
 974     // during scan the decoder gets closed & reopened which will
 975     // close the codec so reopen it if it's not there
 976     if ( ! pv->context->codec )
 977     {
 978         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 979         hb_avcodec_open( pv->context, codec );
 980     }
 981     // set up our best guess at the frame duration.
 982     // the frame rate in the codec is usually bogus but it's sometimes
 983     // ok in the stream.
 984     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 985
 986     if ( st->nb_frames && st->duration )
 987     {
 988         // compute the average frame duration from the total number
 989         // of frames & the total duration.
 990         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 991                        ( (double)st->nb_frames * (double)st->time_base.den );
 992     }
 993     else
 994     {
 995         // XXX We don't have a frame count or duration so try to use the
 996         // far less reliable time base info in the stream.
 997         // Because the time bases are so screwed up, we only take values
 998         // in the range 8fps - 64fps.
 999         AVRational tb;
1000         if ( st->time_base.num * 64 > st->time_base.den &&
1001              st->time_base.den > st->time_base.num * 8 )
1002         {
1003             tb = st->time_base;
1004         }
1005         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
1006                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
1007         {
1008             tb.num = st->r_frame_rate.den;
1009             tb.den = st->r_frame_rate.num;
1010         }
1011         else
1012         {
1013             tb.num = 1001;  /*XXX*/
1014             tb.den = 24000; /*XXX*/
1015         }
1016         pv->duration =  (double)tb.num / (double)tb.den;
1017     }
1018     pv->duration *= 90000.;
1019
1020     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1021     pv->context->opaque = pv;
1022     pv->context->get_buffer = get_frame_buf;
1023
1024     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1025     // b-frames abortion that messes up frame ordering and timestamps.
1026     // XXX ffmpeg knows which streams are broken but doesn't expose the
1027     //     info externally. We should patch ffmpeg to add a flag to the
1028     //     codec context for this but until then we mark all ffmpeg streams
1029     //     as suspicious.
1030     pv->brokenByMicrosoft = 1;
1031 }
1032
1033 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1034 {
1035     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1036     // will seg fault in odd, data dependent ways if it's not there. (my guess
1037     // is this is a case of a local performance optimization creating a global
1038     // performance degradation since all the time wasted by extraneous data
1039     // copies & memory zeroing has to be huge compared to the minor reduction
1040     // in inner-loop instructions this affords - modern cpus bottleneck on
1041     // memory bandwidth not instruction bandwidth).
1042     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1043     {
1044         // have to realloc to add the padding
1045         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1046     }
1047     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1048 }
1049
1050 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1051 {
1052
1053     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1054     w->private_data = pv;
1055     pv->job   = job;
1056     pv->list = hb_list_init();
1057     pv->pts_next = -1;
1058     pv->pts = -1;
1059     return 0;
1060 }
1061
1062 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1063                              hb_buffer_t ** buf_out )
1064 {
1065     hb_work_private_t *pv = w->private_data;
1066     hb_buffer_t *in = *buf_in;
1067     *buf_in = NULL;
1068
1069     /* if we got an empty buffer signaling end-of-stream send it downstream */
1070     if ( in->size == 0 )
1071     {
1072         /* flush any frames left in the decoder */
1073         while ( pv->context && decodeFrame( pv, NULL, 0 ) )
1074         {
1075         }
1076         flushDelayQueue( pv );
1077         hb_list_add( pv->list, in );
1078         *buf_out = link_buf_list( pv );
1079         return HB_WORK_DONE;
1080     }
1081
1082     if ( ! pv->context )
1083     {
1084         init_ffmpeg_context( w );
1085     }
1086
1087     int64_t pts = in->start;
1088     if( pts >= 0 )
1089     {
1090         // use the first timestamp as our 'next expected' pts
1091         if ( pv->pts_next < 0 )
1092         {
1093             pv->pts_next = pts;
1094         }
1095         pv->pts = pts;
1096     }
1097
1098     if ( in->new_chap )
1099     {
1100         pv->new_chap = in->new_chap;
1101         pv->chap_time = pts >= 0? pts : pv->pts_next;
1102     }
1103     prepare_ffmpeg_buffer( in );
1104     decodeFrame( pv, in->data, in->size );
1105     hb_buffer_close( &in );
1106     *buf_out = link_buf_list( pv );
1107     return HB_WORK_OK;
1108 }
1109
1110 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1111 {
1112     if ( decavcodecvInfo( w, info ) )
1113     {
1114         hb_work_private_t *pv = w->private_data;
1115         if ( ! pv->context )
1116         {
1117             init_ffmpeg_context( w );
1118         }
1119         // we have the frame duration in units of the 90KHz pts clock but
1120         // need it in units of the 27MHz MPEG clock. */
1121         info->rate = 27000000;
1122         info->rate_base = pv->duration * 300.;
1123         return 1;
1124     }
1125     return 0;
1126 }
1127
1128 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1129 {
1130     AVCodecContext *context = pv->context;
1131     int pos = 0;
1132
1133     while ( pos < size )
1134     {
1135         int16_t *buffer = pv->buffer;
1136         if ( buffer == NULL )
1137         {
1138             pv->buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1139             buffer = pv->buffer;
1140         }
1141
1142         AVPacket avp;
1143         av_init_packet( &avp );
1144         avp.data = data + pos;
1145         avp.size = size - pos;
1146
1147         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1148         int len = avcodec_decode_audio3( context, buffer, &out_size, &avp );
1149         if ( len <= 0 )
1150         {
1151             return;
1152         }
1153         pos += len;
1154         if( out_size > 0 )
1155         {
1156             // We require signed 16-bit ints for the output format. If
1157             // we got something different convert it.
1158             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1159             {
1160                 // Note: av_audio_convert seems to be a work-in-progress but
1161                 //       looks like it will eventually handle general audio
1162                 //       mixdowns which would allow us much more flexibility
1163                 //       in handling multichannel audio in HB. If we were doing
1164                 //       anything more complicated than a one-for-one format
1165                 //       conversion we'd probably want to cache the converter
1166                 //       context in the pv.
1167                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1168                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1169                                                               context->sample_fmt, 1,
1170                                                               NULL, 0 );
1171                 // get output buffer size (in 2-byte samples) then malloc a buffer
1172                 out_size = ( out_size * 2 ) / isamp;
1173                 buffer = av_malloc( out_size );
1174
1175                 // we're doing straight sample format conversion which behaves as if
1176                 // there were only one channel.
1177                 const void * const ibuf[6] = { pv->buffer };
1178                 void * const obuf[6] = { buffer };
1179                 const int istride[6] = { isamp };
1180                 const int ostride[6] = { 2 };
1181
1182                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1183                 av_audio_convert_free( ctx );
1184             }
1185             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1186
1187             // convert from bytes to total samples
1188             out_size >>= 1;
1189
1190             double pts = pv->pts_next;
1191             buf->start = pts;
1192             pts += out_size * pv->duration;
1193             buf->stop  = pts;
1194             pv->pts_next = pts;
1195
1196             float *fl32 = (float *)buf->data;
1197             int i;
1198             for( i = 0; i < out_size; ++i )
1199             {
1200                 fl32[i] = buffer[i];
1201             }
1202             hb_list_add( pv->list, buf );
1203
1204             // if we allocated a buffer for sample format conversion, free it
1205             if ( buffer != pv->buffer )
1206             {
1207                 av_free( buffer );
1208             }
1209         }
1210     }
1211 }
1212
1213 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1214                     hb_buffer_t **buf_out )
1215 {
1216     if ( (*buf_in)->size <= 0 )
1217     {
1218         /* EOF on input stream - send it downstream & say that we're done */
1219         *buf_out = *buf_in;
1220         *buf_in = NULL;
1221         return HB_WORK_DONE;
1222     }
1223
1224     hb_work_private_t *pv = w->private_data;
1225
1226     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1227     {
1228         // discard buffers that start before video time 0
1229         *buf_out = NULL;
1230         return HB_WORK_OK;
1231     }
1232
1233     if ( ! pv->context )
1234     {
1235         init_ffmpeg_context( w );
1236         // duration is a scaling factor to go from #bytes in the decoded
1237         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1238         // total samples to per-channel samples. 'sample_rate' converts
1239         // per-channel samples to seconds per sample and the 90000
1240         // is mpeg ticks per second.
1241         pv->duration = 90000. /
1242                     (double)( pv->context->sample_rate * pv->context->channels );
1243     }
1244     hb_buffer_t *in = *buf_in;
1245
1246     // if the packet has a timestamp use it if we don't have a timestamp yet
1247     // or if there's been a timing discontinuity of more than 100ms.
1248     if ( in->start >= 0 &&
1249          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1250     {
1251         pv->pts_next = in->start;
1252     }
1253     prepare_ffmpeg_buffer( in );
1254     decodeAudio( pv, in->data, in->size );
1255     *buf_out = link_buf_list( pv );
1256
1257     return HB_WORK_OK;
1258 }
1259
1260 hb_work_object_t hb_decavcodecvi =
1261 {
1262     WORK_DECAVCODECVI,
1263     "Video decoder (ffmpeg streams)",
1264     decavcodecviInit,
1265     decavcodecviWork,
1266     decavcodecClose,
1267     decavcodecviInfo,
1268     decavcodecvBSInfo
1269 };
1270
1271 hb_work_object_t hb_decavcodecai =
1272 {
1273     WORK_DECAVCODECAI,
1274     "Audio decoder (ffmpeg streams)",
1275     decavcodecviInit,
1276     decavcodecaiWork,
1277     decavcodecClose,
1278     decavcodecInfo,
1279     decavcodecBSInfo
1280 };