libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size );
 114 static hb_buffer_t *link_buf_list( hb_work_private_t *pv );
 115
 116
 117 static int64_t heap_pop( pts_heap_t *heap )
 118 {
 119     int64_t result;
 120
 121     if ( heap->nheap <= 0 )
 122     {
 123         return -1;
 124     }
 125
 126     // return the top of the heap then put the bottom element on top,
 127     // decrease the heap size by one & rebalence the heap.
 128     result = heap->h[1];
 129
 130     int64_t v = heap->h[heap->nheap--];
 131     int parent = 1;
 132     int child = parent << 1;
 133     while ( child <= heap->nheap )
 134     {
 135         // find the smallest of the two children of parent
 136         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 137             ++child;
 138
 139         if (v <= heap->h[child])
 140             // new item is smaller than either child so it's the new parent.
 141             break;
 142
 143         // smallest child is smaller than new item so move it up then
 144         // check its children.
 145         int64_t hp = heap->h[child];
 146         heap->h[parent] = hp;
 147         parent = child;
 148         child = parent << 1;
 149     }
 150     heap->h[parent] = v;
 151     return result;
 152 }
 153
 154 static void heap_push( pts_heap_t *heap, int64_t v )
 155 {
 156     if ( heap->nheap < HEAP_SIZE )
 157     {
 158         ++heap->nheap;
 159     }
 160
 161     // stick the new value on the bottom of the heap then bubble it
 162     // up to its correct spot.
 163         int child = heap->nheap;
 164         while (child > 1) {
 165                 int parent = child >> 1;
 166                 if (heap->h[parent] <= v)
 167                         break;
 168                 // move parent down
 169                 int64_t hp = heap->h[parent];
 170                 heap->h[child] = hp;
 171                 child = parent;
 172         }
 173         heap->h[child] = v;
 174 }
 175
 176
 177 /***********************************************************************
 178  * hb_work_decavcodec_init
 179  ***********************************************************************
 180  *
 181  **********************************************************************/
 182 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 183 {
 184     AVCodec * codec;
 185
 186     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 187     w->private_data = pv;
 188
 189     pv->job   = job;
 190     pv->list  = hb_list_init();
 191
 192     int codec_id = w->codec_param;
 193     /*XXX*/
 194     if ( codec_id == 0 )
 195         codec_id = CODEC_ID_MP2;
 196
 197     codec = avcodec_find_decoder( codec_id );
 198     pv->parser = av_parser_init( codec_id );
 199
 200     pv->context = avcodec_alloc_context();
 201     hb_avcodec_open( pv->context, codec );
 202
 203     return 0;
 204 }
 205
 206 /***********************************************************************
 207  * Close
 208  ***********************************************************************
 209  *
 210  **********************************************************************/
 211 static void decavcodecClose( hb_work_object_t * w )
 212 {
 213     hb_work_private_t * pv = w->private_data;
 214
 215     if ( pv )
 216     {
 217         if ( pv->job && pv->context && pv->context->codec )
 218         {
 219             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 220                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 221                     pv->ndrops );
 222         }
 223         if ( pv->sws_context )
 224         {
 225             sws_freeContext( pv->sws_context );
 226         }
 227         if ( pv->parser )
 228         {
 229             av_parser_close(pv->parser);
 230         }
 231         if ( pv->context && pv->context->codec )
 232         {
 233             hb_avcodec_close( pv->context );
 234         }
 235         if ( pv->list )
 236         {
 237             hb_list_close( &pv->list );
 238         }
 239         if ( pv->buffer )
 240         {
 241             av_free( pv->buffer );
 242             pv->buffer = NULL;
 243         }
 244         free( pv );
 245         w->private_data = NULL;
 246     }
 247 }
 248
 249 /***********************************************************************
 250  * Work
 251  ***********************************************************************
 252  *
 253  **********************************************************************/
 254 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 255                     hb_buffer_t ** buf_out )
 256 {
 257     hb_work_private_t * pv = w->private_data;
 258     hb_buffer_t * in = *buf_in;
 259
 260     if ( in->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     // if the packet has a timestamp use it
 277     if ( in->start != -1 )
 278     {
 279         pv->pts_next = in->start;
 280     }
 281
 282     int pos, len;
 283     for ( pos = 0; pos < in->size; pos += len )
 284     {
 285         uint8_t *parser_output_buffer;
 286         int parser_output_buffer_len;
 287         int64_t cur = pv->pts_next;
 288
 289         len = av_parser_parse2( pv->parser, pv->context,
 290                                 &parser_output_buffer, &parser_output_buffer_len,
 291                                 in->data + pos, in->size - pos, cur, cur, AV_NOPTS_VALUE );
 292         if (parser_output_buffer_len)
 293         {
 294             // set the duration on every frame since the stream format can
 295             // change (it shouldn't but there's no way to guarantee it).
 296             // duration is a scaling factor to go from #bytes in the decoded
 297             // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
 298             // total samples to per-channel samples. 'sample_rate' converts
 299             // per-channel samples to seconds per sample and the 90000
 300             // is mpeg ticks per second.
 301             if ( pv->context->sample_rate && pv->context->channels )
 302             {
 303                 pv->duration = 90000. /
 304                             (double)( pv->context->sample_rate * pv->context->channels );
 305             }
 306             decodeAudio( pv, parser_output_buffer, parser_output_buffer_len );
 307         }
 308     }
 309     *buf_out = link_buf_list( pv );
 310     return HB_WORK_OK;
 311 }
 312
 313 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 314 {
 315     hb_work_private_t *pv = w->private_data;
 316
 317     memset( info, 0, sizeof(*info) );
 318
 319     if ( pv && pv->context )
 320     {
 321         AVCodecContext *context = pv->context;
 322         info->bitrate = context->bit_rate;
 323         info->rate = context->time_base.num;
 324         info->rate_base = context->time_base.den;
 325         info->profile = context->profile;
 326         info->level = context->level;
 327         return 1;
 328     }
 329     return 0;
 330 }
 331
 332 static const int chan2layout[] = {
 333     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 334     HB_INPUT_CH_LAYOUT_MONO,
 335     HB_INPUT_CH_LAYOUT_STEREO,
 336     HB_INPUT_CH_LAYOUT_2F1R,
 337     HB_INPUT_CH_LAYOUT_2F2R,
 338     HB_INPUT_CH_LAYOUT_3F2R,
 339     HB_INPUT_CH_LAYOUT_4F2R,
 340     HB_INPUT_CH_LAYOUT_STEREO,
 341     HB_INPUT_CH_LAYOUT_STEREO,
 342 };
 343
 344 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 345                              hb_work_info_t *info )
 346 {
 347     hb_work_private_t *pv = w->private_data;
 348     int ret = 0;
 349
 350     memset( info, 0, sizeof(*info) );
 351
 352     if ( pv && pv->context )
 353     {
 354         return decavcodecInfo( w, info );
 355     }
 356     // XXX
 357     // We should parse the bitstream to find its parameters but for right
 358     // now we just return dummy values if there's a codec that will handle it.
 359     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 360                                                            CODEC_ID_MP2 );
 361     if ( ! codec )
 362     {
 363         // there's no ffmpeg codec for this audio type - give up
 364         return -1;
 365     }
 366
 367     static char codec_name[64];
 368     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 369
 370     AVCodecParserContext *parser = av_parser_init( codec->id );
 371     AVCodecContext *context = avcodec_alloc_context();
 372     hb_avcodec_open( context, codec );
 373     uint8_t *buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 374     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 375     unsigned char *pbuffer;
 376     int pos = 0, pbuffer_size;
 377
 378     while ( pos < buf->size )
 379     {
 380         int len = av_parser_parse2( parser, context, &pbuffer, &pbuffer_size,
 381                                     buf->data + pos, buf->size - pos,
 382                                     buf->start, buf->start, AV_NOPTS_VALUE );
 383         pos += len;
 384         if ( pbuffer_size > 0 )
 385         {
 386             AVPacket avp;
 387             av_init_packet( &avp );
 388             avp.data = pbuffer;
 389             avp.size = pbuffer_size;
 390
 391             len = avcodec_decode_audio3( context, (int16_t*)buffer, &out_size, &avp );
 392             if ( len > 0 && context->sample_rate > 0 )
 393             {
 394                 info->bitrate = context->bit_rate;
 395                 info->rate = context->sample_rate;
 396                 info->rate_base = 1;
 397                 info->channel_layout = chan2layout[context->channels & 7];
 398                 ret = 1;
 399                 break;
 400             }
 401         }
 402     }
 403     av_free( buffer );
 404     av_parser_close( parser );
 405     hb_avcodec_close( context );
 406     return ret;
 407 }
 408
 409 /* -------------------------------------------------------------
 410  * General purpose video decoder using libavcodec
 411  */
 412
 413 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 414                             int h )
 415 {
 416     if ( dstride == sstride )
 417     {
 418         memcpy( dst, src, dstride * h );
 419         return dst + dstride * h;
 420     }
 421     int lbytes = dstride <= sstride? dstride : sstride;
 422     while ( --h >= 0 )
 423     {
 424         memcpy( dst, src, lbytes );
 425         src += sstride;
 426         dst += dstride;
 427     }
 428     return dst;
 429 }
 430
 431 // copy one video frame into an HB buf. If the frame isn't in our color space
 432 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 433 // Otherwise just copy the bits.
 434 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 435 {
 436     AVCodecContext *context = pv->context;
 437     int w, h;
 438     if ( ! pv->job )
 439     {
 440         // if the dimensions are odd, drop the lsb since h264 requires that
 441         // both width and height be even.
 442         w = ( context->width >> 1 ) << 1;
 443         h = ( context->height >> 1 ) << 1;
 444     }
 445     else
 446     {
 447         w =  pv->job->title->width;
 448         h =  pv->job->title->height;
 449     }
 450     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 451     uint8_t *dst = buf->data;
 452
 453     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 454          h != context->height )
 455     {
 456         // have to convert to our internal color space and/or rescale
 457         AVPicture dstpic;
 458         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 459
 460         if ( ! pv->sws_context )
 461         {
 462             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 463                                               w, h, PIX_FMT_YUV420P,
 464                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 465                                               NULL, NULL, NULL );
 466         }
 467         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 468                    dstpic.data, dstpic.linesize );
 469     }
 470     else
 471     {
 472         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 473         w = (w + 1) >> 1; h = (h + 1) >> 1;
 474         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 475         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 476     }
 477     return buf;
 478 }
 479
 480 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 481 {
 482     hb_work_private_t *pv = context->opaque;
 483     frame->pts = pv->pts;
 484     pv->pts = -1;
 485     return avcodec_default_get_buffer( context, frame );
 486 }
 487
 488 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 489 {
 490     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 491     if ( c && c->title )
 492     {
 493         hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
 494                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 495     }
 496     else
 497     {
 498         hb_log( "%s: Chapter %d at frame %u time %"PRId64,
 499                 pv->context->codec->name, chap_num, pv->nframes, pts );
 500     }
 501 }
 502
 503 static void flushDelayQueue( hb_work_private_t *pv )
 504 {
 505     hb_buffer_t *buf;
 506     int slot = pv->nframes & (HEAP_SIZE-1);
 507
 508     // flush all the video packets left on our timestamp-reordering delay q
 509     while ( ( buf = pv->delayq[slot] ) != NULL )
 510     {
 511         buf->start = heap_pop( &pv->pts_heap );
 512         hb_list_add( pv->list, buf );
 513         pv->delayq[slot] = NULL;
 514         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 515     }
 516 }
 517
 518 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 519 {
 520     int got_picture, oldlevel = 0;
 521     AVFrame frame;
 522     AVPacket avp;
 523
 524     if ( global_verbosity_level <= 1 )
 525     {
 526         oldlevel = av_log_get_level();
 527         av_log_set_level( AV_LOG_QUIET );
 528     }
 529
 530     av_init_packet( &avp );
 531     avp.data = data;
 532     avp.size = size;
 533     if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
 534     {
 535         ++pv->decode_errors;
 536     }
 537     if ( global_verbosity_level <= 1 )
 538     {
 539         av_log_set_level( oldlevel );
 540     }
 541     if( got_picture )
 542     {
 543         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 544         // packet had a pts we handed it to av_parser_parse (if the packet had
 545         // no pts we set it to -1 but before the parse we can't distinguish between
 546         // the start of a video frame with no pts & an intermediate packet of
 547         // some frame which never has a pts). we hope that when parse returns
 548         // the frame to us the pts we originally handed it will be in parser->pts.
 549         // we put this pts into pv->pts so that when a avcodec_decode_video
 550         // finally gets around to allocating an AVFrame to hold the decoded
 551         // frame we can stuff that pts into the frame. if all of these relays
 552         // worked at this point frame.pts should hold the frame's pts from the
 553         // original data stream or -1 if it didn't have one. in the latter case
 554         // we generate the next pts in sequence for it.
 555         double frame_dur = pv->duration;
 556         if ( frame_dur <= 0 )
 557         {
 558             frame_dur = 90000. * (double)pv->context->time_base.num /
 559                         (double)pv->context->time_base.den;
 560             pv->duration = frame_dur;
 561         }
 562         if ( frame.repeat_pict )
 563         {
 564             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 565         }
 566         // XXX Unlike every other video decoder, the Raw decoder doesn't
 567         //     use the standard buffer allocation routines so we never
 568         //     get to put a PTS in the frame. Do it now.
 569         if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
 570         {
 571             frame.pts = pv->pts;
 572             pv->pts = -1;
 573         }
 574         // If there was no pts for this frame, assume constant frame rate
 575         // video & estimate the next frame time from the last & duration.
 576         double pts = frame.pts;
 577         if ( pts < 0 )
 578         {
 579             pts = pv->pts_next;
 580         }
 581         pv->pts_next = pts + frame_dur;
 582
 583         hb_buffer_t *buf;
 584
 585         // if we're doing a scan or this content couldn't have been broken
 586         // by Microsoft we don't worry about timestamp reordering
 587         if ( ! pv->job || ! pv->brokenByMicrosoft )
 588         {
 589             buf = copy_frame( pv, &frame );
 590             buf->start = pts;
 591             hb_list_add( pv->list, buf );
 592             ++pv->nframes;
 593             return got_picture;
 594         }
 595
 596         // XXX This following probably addresses a libavcodec bug but I don't
 597         //     see an easy fix so we workaround it here.
 598         //
 599         // The M$ 'packed B-frames' atrocity results in decoded frames with
 600         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 601         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 602         // The frames are actually delivered in the right order but with
 603         // the wrong timestamp. To get the correct timestamp attached to
 604         // each frame we have a delay queue (longer than the max number of
 605         // b-frames) & a sorting heap for the timestamps. As each frame
 606         // comes out of the decoder the oldest frame in the queue is removed
 607         // and associated with the smallest timestamp. Then the new frame is
 608         // added to the queue & its timestamp is pushed on the heap.
 609         // This does nothing if the timestamps are correct (i.e., the video
 610         // uses a codec that Micro$oft hasn't broken yet) but the frames
 611         // get timestamped correctly even when M$ has munged them.
 612
 613         // remove the oldest picture from the frame queue (if any) &
 614         // give it the smallest timestamp from our heap. The queue size
 615         // is a power of two so we get the slot of the oldest by masking
 616         // the frame count & this will become the slot of the newest
 617         // once we've removed & processed the oldest.
 618         int slot = pv->nframes & (HEAP_SIZE-1);
 619         if ( ( buf = pv->delayq[slot] ) != NULL )
 620         {
 621             buf->start = heap_pop( &pv->pts_heap );
 622
 623             if ( pv->new_chap && buf->start >= pv->chap_time )
 624             {
 625                 buf->new_chap = pv->new_chap;
 626                 pv->new_chap = 0;
 627                 pv->chap_time = 0;
 628                 log_chapter( pv, buf->new_chap, buf->start );
 629             }
 630             else if ( pv->nframes == 0 )
 631             {
 632                 log_chapter( pv, pv->job->chapter_start, buf->start );
 633             }
 634             hb_list_add( pv->list, buf );
 635         }
 636
 637         // add the new frame to the delayq & push its timestamp on the heap
 638         pv->delayq[slot] = copy_frame( pv, &frame );
 639         heap_push( &pv->pts_heap, pts );
 640
 641         ++pv->nframes;
 642     }
 643
 644     return got_picture;
 645 }
 646
 647 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 648                          int64_t pts, int64_t dts )
 649 {
 650     /*
 651      * The following loop is a do..while because we need to handle both
 652      * data & the flush at the end (signaled by size=0). At the end there's
 653      * generally a frame in the parser & one or more frames in the decoder
 654      * (depending on the bframes setting).
 655      */
 656     int pos = 0;
 657     do {
 658         uint8_t *pout;
 659         int pout_len;
 660         int len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
 661                                     data + pos, size - pos, pts, dts, AV_NOPTS_VALUE );
 662         pos += len;
 663
 664         if ( pout_len > 0 )
 665         {
 666             pv->pts = pv->parser->pts;
 667             decodeFrame( pv, pout, pout_len );
 668         }
 669     } while ( pos < size );
 670
 671     /* the stuff above flushed the parser, now flush the decoder */
 672     if ( size <= 0 )
 673     {
 674         while ( decodeFrame( pv, NULL, 0 ) )
 675         {
 676         }
 677         flushDelayQueue( pv );
 678     }
 679 }
 680
 681 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 682 {
 683     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 684
 685     if ( head )
 686     {
 687         hb_list_rem( pv->list, head );
 688
 689         hb_buffer_t *last = head, *buf;
 690
 691         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 692         {
 693             hb_list_rem( pv->list, buf );
 694             last->next = buf;
 695             last = buf;
 696         }
 697     }
 698     return head;
 699 }
 700
 701
 702 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 703 {
 704
 705     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 706     w->private_data = pv;
 707     pv->job   = job;
 708     pv->list = hb_list_init();
 709
 710     int codec_id = w->codec_param;
 711     pv->parser = av_parser_init( codec_id );
 712     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 713
 714     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 715     pv->context->opaque = pv;
 716     pv->context->get_buffer = get_frame_buf;
 717
 718     return 0;
 719 }
 720
 721 static int next_hdr( hb_buffer_t *in, int offset )
 722 {
 723     uint8_t *dat = in->data;
 724     uint16_t last2 = 0xffff;
 725     for ( ; in->size - offset > 1; ++offset )
 726     {
 727         if ( last2 == 0 && dat[offset] == 0x01 )
 728             // found an mpeg start code
 729             return offset - 2;
 730
 731         last2 = ( last2 << 8 ) | dat[offset];
 732     }
 733
 734     return -1;
 735 }
 736
 737 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 738 {
 739     if ( in->size - offset < 4 )
 740         // not enough room for an mpeg start code
 741         return -1;
 742
 743     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 744     {
 745         if ( in->data[offset+3] == hdr_type )
 746             // found it
 747             break;
 748     }
 749     return offset;
 750 }
 751
 752 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 753 {
 754     hb_work_private_t *pv = w->private_data;
 755
 756     // we can't call the avstream funcs but the read_header func in the
 757     // AVInputFormat may set up some state in the AVContext. In particular
 758     // vc1t_read_header allocates 'extradata' to deal with header issues
 759     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 760     // of space to make vc1 work then associate the codec with the context.
 761     if ( w->codec_param != CODEC_ID_VC1 )
 762     {
 763         // we haven't been inflicted with M$ - allocate a little space as
 764         // a marker and return success.
 765         pv->context->extradata_size = 16;
 766         pv->context->extradata = av_malloc(pv->context->extradata_size);
 767         return 0;
 768     }
 769
 770     // find the start and and of the sequence header
 771     int shdr, shdr_end;
 772     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 773     {
 774         // didn't find start of seq hdr
 775         return 1;
 776     }
 777     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 778     {
 779         shdr_end = in->size;
 780     }
 781     shdr_end -= shdr;
 782
 783     // find the start and and of the entry point header
 784     int ehdr, ehdr_end;
 785     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 786     {
 787         // didn't find start of entry point hdr
 788         return 1;
 789     }
 790     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 791     {
 792         ehdr_end = in->size;
 793     }
 794     ehdr_end -= ehdr;
 795
 796     // found both headers - allocate an extradata big enough to hold both
 797     // then copy them into it.
 798     pv->context->extradata_size = shdr_end + ehdr_end;
 799     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 800     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 801     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 802     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 803     return 0;
 804 }
 805
 806 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 807                             hb_buffer_t ** buf_out )
 808 {
 809     hb_work_private_t *pv = w->private_data;
 810     hb_buffer_t *in = *buf_in;
 811     int64_t pts = AV_NOPTS_VALUE;
 812     int64_t dts = pts;
 813
 814     *buf_in = NULL;
 815
 816     /* if we got an empty buffer signaling end-of-stream send it downstream */
 817     if ( in->size == 0 )
 818     {
 819         decodeVideo( pv, in->data, in->size, pts, dts );
 820         hb_list_add( pv->list, in );
 821         *buf_out = link_buf_list( pv );
 822         return HB_WORK_DONE;
 823     }
 824
 825     // if this is the first frame open the codec (we have to wait for the
 826     // first frame because of M$ VC1 braindamage).
 827     if ( pv->context->extradata_size == 0 )
 828     {
 829         if ( setup_extradata( w, in ) )
 830         {
 831             // we didn't find the headers needed to set up extradata.
 832             // the codec will abort if we open it so just free the buf
 833             // and hope we eventually get the info we need.
 834             hb_buffer_close( &in );
 835             return HB_WORK_OK;
 836         }
 837         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 838         // There's a mis-feature in ffmpeg that causes the context to be
 839         // incorrectly initialized the 1st time avcodec_open is called.
 840         // If you close it and open a 2nd time, it finishes the job.
 841         hb_avcodec_open( pv->context, codec );
 842         hb_avcodec_close( pv->context );
 843         hb_avcodec_open( pv->context, codec );
 844     }
 845
 846     if( in->start >= 0 )
 847     {
 848         pts = in->start;
 849         dts = in->renderOffset;
 850     }
 851     if ( in->new_chap )
 852     {
 853         pv->new_chap = in->new_chap;
 854         pv->chap_time = pts >= 0? pts : pv->pts_next;
 855     }
 856     decodeVideo( pv, in->data, in->size, pts, dts );
 857     hb_buffer_close( &in );
 858     *buf_out = link_buf_list( pv );
 859     return HB_WORK_OK;
 860 }
 861
 862 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 863 {
 864     hb_work_private_t *pv = w->private_data;
 865
 866     memset( info, 0, sizeof(*info) );
 867
 868     if ( pv && pv->context )
 869     {
 870         AVCodecContext *context = pv->context;
 871         info->bitrate = context->bit_rate;
 872         info->width = context->width;
 873         info->height = context->height;
 874
 875         /* ffmpeg gives the frame rate in frames per second while HB wants
 876          * it in units of the 27MHz MPEG clock. */
 877         info->rate = 27000000;
 878         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 879                           context->time_base.den;
 880         if ( context->ticks_per_frame > 1 )
 881         {
 882             // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
 883             // field rate rather than frame rate so convert back to frames.
 884             info->rate_base *= context->ticks_per_frame;
 885         }
 886
 887         info->pixel_aspect_width = context->sample_aspect_ratio.num;
 888         info->pixel_aspect_height = context->sample_aspect_ratio.den;
 889
 890         /* Sometimes there's no pixel aspect set in the source ffmpeg context
 891          * which appears to come from the video stream. In that case,
 892          * try the pixel aspect in AVStream (which appears to come from
 893          * the container). Else assume a 1:1 PAR. */
 894         if ( info->pixel_aspect_width == 0 ||
 895              info->pixel_aspect_height == 0 )
 896         {
 897             AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 898             info->pixel_aspect_width = st->sample_aspect_ratio.num ?
 899                                         st->sample_aspect_ratio.num : 1;
 900             info->pixel_aspect_height = st->sample_aspect_ratio.den ?
 901                                         st->sample_aspect_ratio.den : 1;
 902         }
 903         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 904          * Display Aspect Ratio so we convert by scaling by the Storage
 905          * Aspect Ratio (w/h). We do the calc in floating point to get the
 906          * rounding right. */
 907         info->aspect = (double)info->pixel_aspect_width *
 908                        (double)context->width /
 909                        (double)info->pixel_aspect_height /
 910                        (double)context->height;
 911
 912         info->profile = context->profile;
 913         info->level = context->level;
 914         info->name = context->codec->name;
 915         return 1;
 916     }
 917     return 0;
 918 }
 919
 920 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 921                              hb_work_info_t *info )
 922 {
 923     return 0;
 924 }
 925
 926 hb_work_object_t hb_decavcodecv =
 927 {
 928     WORK_DECAVCODECV,
 929     "Video decoder (libavcodec)",
 930     decavcodecvInit,
 931     decavcodecvWork,
 932     decavcodecClose,
 933     decavcodecvInfo,
 934     decavcodecvBSInfo
 935 };
 936
 937
 938 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 939 // includes a parser and passes information from the parser to the decoder
 940 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 941 // We *have* to use that codec context to decode the stream or we'll get
 942 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 943 // codec context in our codec_param.
 944
 945 // this routine gets the appropriate context pointer from the ffmpeg
 946 // stream reader. it can't be called until we get the first buffer because
 947 // we can't guarantee that reader will be called before the our init
 948 // routine and if our init is called first we'll get a pointer to the
 949 // old scan stream (which has already been closed).
 950 static void init_ffmpeg_context( hb_work_object_t *w )
 951 {
 952     hb_work_private_t *pv = w->private_data;
 953     pv->context = hb_ffmpeg_context( w->codec_param );
 954
 955     // during scan the decoder gets closed & reopened which will
 956     // close the codec so reopen it if it's not there
 957     if ( ! pv->context->codec )
 958     {
 959         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 960         hb_avcodec_open( pv->context, codec );
 961     }
 962     // set up our best guess at the frame duration.
 963     // the frame rate in the codec is usually bogus but it's sometimes
 964     // ok in the stream.
 965     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 966
 967     if ( st->nb_frames && st->duration )
 968     {
 969         // compute the average frame duration from the total number
 970         // of frames & the total duration.
 971         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 972                        ( (double)st->nb_frames * (double)st->time_base.den );
 973     }
 974     else
 975     {
 976         // XXX We don't have a frame count or duration so try to use the
 977         // far less reliable time base info in the stream.
 978         // Because the time bases are so screwed up, we only take values
 979         // in the range 8fps - 64fps.
 980         AVRational tb;
 981         if ( st->time_base.num * 64 > st->time_base.den &&
 982              st->time_base.den > st->time_base.num * 8 )
 983         {
 984             tb = st->time_base;
 985         }
 986         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
 987                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
 988         {
 989             tb.num = st->r_frame_rate.den;
 990             tb.den = st->r_frame_rate.num;
 991         }
 992         else
 993         {
 994             tb.num = 1001;  /*XXX*/
 995             tb.den = 24000; /*XXX*/
 996         }
 997         pv->duration =  (double)tb.num / (double)tb.den;
 998     }
 999     pv->duration *= 90000.;
1000
1001     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1002     pv->context->opaque = pv;
1003     pv->context->get_buffer = get_frame_buf;
1004
1005     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1006     // b-frames abortion that messes up frame ordering and timestamps.
1007     // XXX ffmpeg knows which streams are broken but doesn't expose the
1008     //     info externally. We should patch ffmpeg to add a flag to the
1009     //     codec context for this but until then we mark all ffmpeg streams
1010     //     as suspicious.
1011     pv->brokenByMicrosoft = 1;
1012 }
1013
1014 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1015 {
1016     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1017     // will seg fault in odd, data dependent ways if it's not there. (my guess
1018     // is this is a case of a local performance optimization creating a global
1019     // performance degradation since all the time wasted by extraneous data
1020     // copies & memory zeroing has to be huge compared to the minor reduction
1021     // in inner-loop instructions this affords - modern cpus bottleneck on
1022     // memory bandwidth not instruction bandwidth).
1023     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1024     {
1025         // have to realloc to add the padding
1026         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1027     }
1028     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1029 }
1030
1031 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1032 {
1033
1034     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1035     w->private_data = pv;
1036     pv->job   = job;
1037     pv->list = hb_list_init();
1038     pv->pts_next = -1;
1039     pv->pts = -1;
1040     return 0;
1041 }
1042
1043 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1044                              hb_buffer_t ** buf_out )
1045 {
1046     hb_work_private_t *pv = w->private_data;
1047     hb_buffer_t *in = *buf_in;
1048     *buf_in = NULL;
1049
1050     /* if we got an empty buffer signaling end-of-stream send it downstream */
1051     if ( in->size == 0 )
1052     {
1053         /* flush any frames left in the decoder */
1054         while ( pv->context && decodeFrame( pv, NULL, 0 ) )
1055         {
1056         }
1057         flushDelayQueue( pv );
1058         hb_list_add( pv->list, in );
1059         *buf_out = link_buf_list( pv );
1060         return HB_WORK_DONE;
1061     }
1062
1063     if ( ! pv->context )
1064     {
1065         init_ffmpeg_context( w );
1066     }
1067
1068     int64_t pts = in->start;
1069     if( pts >= 0 )
1070     {
1071         // use the first timestamp as our 'next expected' pts
1072         if ( pv->pts_next < 0 )
1073         {
1074             pv->pts_next = pts;
1075         }
1076         pv->pts = pts;
1077     }
1078
1079     if ( in->new_chap )
1080     {
1081         pv->new_chap = in->new_chap;
1082         pv->chap_time = pts >= 0? pts : pv->pts_next;
1083     }
1084     prepare_ffmpeg_buffer( in );
1085     decodeFrame( pv, in->data, in->size );
1086     hb_buffer_close( &in );
1087     *buf_out = link_buf_list( pv );
1088     return HB_WORK_OK;
1089 }
1090
1091 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1092 {
1093     if ( decavcodecvInfo( w, info ) )
1094     {
1095         hb_work_private_t *pv = w->private_data;
1096         if ( ! pv->context )
1097         {
1098             init_ffmpeg_context( w );
1099         }
1100         // we have the frame duration in units of the 90KHz pts clock but
1101         // need it in units of the 27MHz MPEG clock. */
1102         info->rate = 27000000;
1103         info->rate_base = pv->duration * 300.;
1104         return 1;
1105     }
1106     return 0;
1107 }
1108
1109 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1110 {
1111     AVCodecContext *context = pv->context;
1112     int pos = 0;
1113
1114     while ( pos < size )
1115     {
1116         int16_t *buffer = pv->buffer;
1117         if ( buffer == NULL )
1118         {
1119             pv->buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1120             buffer = pv->buffer;
1121         }
1122
1123         AVPacket avp;
1124         av_init_packet( &avp );
1125         avp.data = data + pos;
1126         avp.size = size - pos;
1127
1128         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1129         int len = avcodec_decode_audio3( context, buffer, &out_size, &avp );
1130         if ( len <= 0 )
1131         {
1132             return;
1133         }
1134         pos += len;
1135         if( out_size > 0 )
1136         {
1137             // We require signed 16-bit ints for the output format. If
1138             // we got something different convert it.
1139             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1140             {
1141                 // Note: av_audio_convert seems to be a work-in-progress but
1142                 //       looks like it will eventually handle general audio
1143                 //       mixdowns which would allow us much more flexibility
1144                 //       in handling multichannel audio in HB. If we were doing
1145                 //       anything more complicated than a one-for-one format
1146                 //       conversion we'd probably want to cache the converter
1147                 //       context in the pv.
1148                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1149                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1150                                                               context->sample_fmt, 1,
1151                                                               NULL, 0 );
1152                 // get output buffer size (in 2-byte samples) then malloc a buffer
1153                 out_size = ( out_size * 2 ) / isamp;
1154                 buffer = av_malloc( out_size );
1155
1156                 // we're doing straight sample format conversion which behaves as if
1157                 // there were only one channel.
1158                 const void * const ibuf[6] = { pv->buffer };
1159                 void * const obuf[6] = { buffer };
1160                 const int istride[6] = { isamp };
1161                 const int ostride[6] = { 2 };
1162
1163                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1164                 av_audio_convert_free( ctx );
1165             }
1166             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1167
1168             // convert from bytes to total samples
1169             out_size >>= 1;
1170
1171             double pts = pv->pts_next;
1172             buf->start = pts;
1173             pts += out_size * pv->duration;
1174             buf->stop  = pts;
1175             pv->pts_next = pts;
1176
1177             float *fl32 = (float *)buf->data;
1178             int i;
1179             for( i = 0; i < out_size; ++i )
1180             {
1181                 fl32[i] = buffer[i];
1182             }
1183             hb_list_add( pv->list, buf );
1184
1185             // if we allocated a buffer for sample format conversion, free it
1186             if ( buffer != pv->buffer )
1187             {
1188                 av_free( buffer );
1189             }
1190         }
1191     }
1192 }
1193
1194 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1195                     hb_buffer_t **buf_out )
1196 {
1197     if ( (*buf_in)->size <= 0 )
1198     {
1199         /* EOF on input stream - send it downstream & say that we're done */
1200         *buf_out = *buf_in;
1201         *buf_in = NULL;
1202         return HB_WORK_DONE;
1203     }
1204
1205     hb_work_private_t *pv = w->private_data;
1206
1207     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1208     {
1209         // discard buffers that start before video time 0
1210         *buf_out = NULL;
1211         return HB_WORK_OK;
1212     }
1213
1214     if ( ! pv->context )
1215     {
1216         init_ffmpeg_context( w );
1217         // duration is a scaling factor to go from #bytes in the decoded
1218         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1219         // total samples to per-channel samples. 'sample_rate' converts
1220         // per-channel samples to seconds per sample and the 90000
1221         // is mpeg ticks per second.
1222         pv->duration = 90000. /
1223                     (double)( pv->context->sample_rate * pv->context->channels );
1224     }
1225     hb_buffer_t *in = *buf_in;
1226
1227     // if the packet has a timestamp use it if we don't have a timestamp yet
1228     // or if there's been a timing discontinuity of more than 100ms.
1229     if ( in->start >= 0 &&
1230          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1231     {
1232         pv->pts_next = in->start;
1233     }
1234     prepare_ffmpeg_buffer( in );
1235     decodeAudio( pv, in->data, in->size );
1236     *buf_out = link_buf_list( pv );
1237
1238     return HB_WORK_OK;
1239 }
1240
1241 hb_work_object_t hb_decavcodecvi =
1242 {
1243     WORK_DECAVCODECVI,
1244     "Video decoder (ffmpeg streams)",
1245     decavcodecviInit,
1246     decavcodecviWork,
1247     decavcodecClose,
1248     decavcodecviInfo,
1249     decavcodecvBSInfo
1250 };
1251
1252 hb_work_object_t hb_decavcodecai =
1253 {
1254     WORK_DECAVCODECAI,
1255     "Audio decoder (ffmpeg streams)",
1256     decavcodecviInit,
1257     decavcodecaiWork,
1258     decavcodecClose,
1259     decavcodecInfo,
1260     decavcodecBSInfo
1261 };