libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static int64_t heap_pop( pts_heap_t *heap )
 114 {
 115     int64_t result;
 116
 117     if ( heap->nheap <= 0 )
 118     {
 119         return -1;
 120     }
 121
 122     // return the top of the heap then put the bottom element on top,
 123     // decrease the heap size by one & rebalence the heap.
 124     result = heap->h[1];
 125
 126     int64_t v = heap->h[heap->nheap--];
 127     int parent = 1;
 128     int child = parent << 1;
 129     while ( child <= heap->nheap )
 130     {
 131         // find the smallest of the two children of parent
 132         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 133             ++child;
 134
 135         if (v <= heap->h[child])
 136             // new item is smaller than either child so it's the new parent.
 137             break;
 138
 139         // smallest child is smaller than new item so move it up then
 140         // check its children.
 141         int64_t hp = heap->h[child];
 142         heap->h[parent] = hp;
 143         parent = child;
 144         child = parent << 1;
 145     }
 146     heap->h[parent] = v;
 147     return result;
 148 }
 149
 150 static void heap_push( pts_heap_t *heap, int64_t v )
 151 {
 152     if ( heap->nheap < HEAP_SIZE )
 153     {
 154         ++heap->nheap;
 155     }
 156
 157     // stick the new value on the bottom of the heap then bubble it
 158     // up to its correct spot.
 159         int child = heap->nheap;
 160         while (child > 1) {
 161                 int parent = child >> 1;
 162                 if (heap->h[parent] <= v)
 163                         break;
 164                 // move parent down
 165                 int64_t hp = heap->h[parent];
 166                 heap->h[child] = hp;
 167                 child = parent;
 168         }
 169         heap->h[child] = v;
 170 }
 171
 172
 173 /***********************************************************************
 174  * hb_work_decavcodec_init
 175  ***********************************************************************
 176  *
 177  **********************************************************************/
 178 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 179 {
 180     AVCodec * codec;
 181
 182     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 183     w->private_data = pv;
 184
 185     pv->job   = job;
 186
 187     int codec_id = w->codec_param;
 188     /*XXX*/
 189     if ( codec_id == 0 )
 190         codec_id = CODEC_ID_MP2;
 191
 192     codec = avcodec_find_decoder( codec_id );
 193     pv->parser = av_parser_init( codec_id );
 194
 195     pv->context = avcodec_alloc_context();
 196     hb_avcodec_open( pv->context, codec );
 197
 198     return 0;
 199 }
 200
 201 /***********************************************************************
 202  * Close
 203  ***********************************************************************
 204  *
 205  **********************************************************************/
 206 static void decavcodecClose( hb_work_object_t * w )
 207 {
 208     hb_work_private_t * pv = w->private_data;
 209
 210     if ( pv )
 211     {
 212         if ( pv->job && pv->context && pv->context->codec )
 213         {
 214             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 215                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 216                     pv->ndrops );
 217         }
 218         if ( pv->sws_context )
 219         {
 220             sws_freeContext( pv->sws_context );
 221         }
 222         if ( pv->parser )
 223         {
 224             av_parser_close(pv->parser);
 225         }
 226         if ( pv->context && pv->context->codec )
 227         {
 228             hb_avcodec_close( pv->context );
 229         }
 230         if ( pv->list )
 231         {
 232             hb_list_close( &pv->list );
 233         }
 234         if ( pv->buffer )
 235         {
 236             av_free( pv->buffer );
 237             pv->buffer = NULL;
 238         }
 239         free( pv );
 240         w->private_data = NULL;
 241     }
 242 }
 243
 244 /***********************************************************************
 245  * Work
 246  ***********************************************************************
 247  *
 248  **********************************************************************/
 249 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 250                     hb_buffer_t ** buf_out )
 251 {
 252     hb_work_private_t * pv = w->private_data;
 253     hb_buffer_t * in = *buf_in, * buf, * last = NULL;
 254     int   pos, len, out_size, i, uncompressed_len;
 255     short* bufaligned;
 256     uint64_t cur;
 257     unsigned char *parser_output_buffer;
 258     int parser_output_buffer_len;
 259
 260     if ( (*buf_in)->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = *buf_in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     cur = ( in->start < 0 )? pv->pts_next : in->start;
 277
 278     bufaligned = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 279     pos = 0;
 280     while( pos < in->size )
 281     {
 282         len = av_parser_parse2( pv->parser, pv->context,
 283                                 &parser_output_buffer, &parser_output_buffer_len,
 284                                 in->data + pos, in->size - pos, cur, cur, AV_NOPTS_VALUE );
 285         out_size = 0;
 286         uncompressed_len = 0;
 287         if (parser_output_buffer_len)
 288         {
 289             AVPacket avp;
 290             av_init_packet( &avp );
 291             avp.data = parser_output_buffer;
 292             avp.size = parser_output_buffer_len;
 293
 294             out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 295             uncompressed_len = avcodec_decode_audio3( pv->context, bufaligned, &out_size, &avp );
 296         }
 297         if( out_size )
 298         {
 299             short * s16;
 300             float * fl32;
 301
 302             buf = hb_buffer_init( 2 * out_size );
 303
 304             int sample_size_in_bytes = 2;   // Default to 2 bytes
 305             switch (pv->context->sample_fmt)
 306             {
 307               case SAMPLE_FMT_S16:
 308                 sample_size_in_bytes = 2;
 309                 break;
 310               /* We should handle other formats here - but that needs additional format conversion work below */
 311               /* For now we'll just report the error and try to carry on */
 312               default:
 313                 hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
 314                 break;
 315             }
 316
 317             buf->start = cur;
 318             buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
 319                          pv->context->sample_rate;
 320             cur = buf->stop;
 321
 322             s16  = bufaligned;
 323             fl32 = (float *) buf->data;
 324             for( i = 0; i < out_size / 2; i++ )
 325             {
 326                 fl32[i] = s16[i];
 327             }
 328
 329             if( last )
 330             {
 331                 last = last->next = buf;
 332             }
 333             else
 334             {
 335                 *buf_out = last = buf;
 336             }
 337         }
 338
 339         pos += len;
 340     }
 341
 342     pv->pts_next = cur;
 343
 344     av_free( bufaligned );
 345     return HB_WORK_OK;
 346 }
 347
 348 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 349 {
 350     hb_work_private_t *pv = w->private_data;
 351
 352     memset( info, 0, sizeof(*info) );
 353
 354     if ( pv && pv->context )
 355     {
 356         AVCodecContext *context = pv->context;
 357         info->bitrate = context->bit_rate;
 358         info->rate = context->time_base.num;
 359         info->rate_base = context->time_base.den;
 360         info->profile = context->profile;
 361         info->level = context->level;
 362         return 1;
 363     }
 364     return 0;
 365 }
 366
 367 static const int chan2layout[] = {
 368     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 369     HB_INPUT_CH_LAYOUT_MONO,
 370     HB_INPUT_CH_LAYOUT_STEREO,
 371     HB_INPUT_CH_LAYOUT_2F1R,
 372     HB_INPUT_CH_LAYOUT_2F2R,
 373     HB_INPUT_CH_LAYOUT_3F2R,
 374     HB_INPUT_CH_LAYOUT_4F2R,
 375     HB_INPUT_CH_LAYOUT_STEREO,
 376     HB_INPUT_CH_LAYOUT_STEREO,
 377 };
 378
 379 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 380                              hb_work_info_t *info )
 381 {
 382     hb_work_private_t *pv = w->private_data;
 383     int ret = 0;
 384
 385     memset( info, 0, sizeof(*info) );
 386
 387     if ( pv && pv->context )
 388     {
 389         return decavcodecInfo( w, info );
 390     }
 391     // XXX
 392     // We should parse the bitstream to find its parameters but for right
 393     // now we just return dummy values if there's a codec that will handle it.
 394     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 395                                                            CODEC_ID_MP2 );
 396     if ( ! codec )
 397     {
 398         // there's no ffmpeg codec for this audio type - give up
 399         return -1;
 400     }
 401
 402     static char codec_name[64];
 403     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 404
 405     AVCodecParserContext *parser = av_parser_init( codec->id );
 406     AVCodecContext *context = avcodec_alloc_context();
 407     hb_avcodec_open( context, codec );
 408     uint8_t *buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 409     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 410     unsigned char *pbuffer;
 411     int pos = 0, pbuffer_size;
 412
 413     while ( pos < buf->size )
 414     {
 415         int len = av_parser_parse2( parser, context, &pbuffer, &pbuffer_size,
 416                                     buf->data + pos, buf->size - pos,
 417                                     buf->start, buf->start, AV_NOPTS_VALUE );
 418         pos += len;
 419         if ( pbuffer_size > 0 )
 420         {
 421             AVPacket avp;
 422             av_init_packet( &avp );
 423             avp.data = pbuffer;
 424             avp.size = pbuffer_size;
 425
 426             len = avcodec_decode_audio3( context, (int16_t*)buffer, &out_size, &avp );
 427             if ( len > 0 && context->sample_rate > 0 )
 428             {
 429                 info->bitrate = context->bit_rate;
 430                 info->rate = context->sample_rate;
 431                 info->rate_base = 1;
 432                 info->channel_layout = chan2layout[context->channels & 7];
 433                 ret = 1;
 434                 break;
 435             }
 436         }
 437     }
 438     av_free( buffer );
 439     av_parser_close( parser );
 440     hb_avcodec_close( context );
 441     return ret;
 442 }
 443
 444 /* -------------------------------------------------------------
 445  * General purpose video decoder using libavcodec
 446  */
 447
 448 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 449                             int h )
 450 {
 451     if ( dstride == sstride )
 452     {
 453         memcpy( dst, src, dstride * h );
 454         return dst + dstride * h;
 455     }
 456     int lbytes = dstride <= sstride? dstride : sstride;
 457     while ( --h >= 0 )
 458     {
 459         memcpy( dst, src, lbytes );
 460         src += sstride;
 461         dst += dstride;
 462     }
 463     return dst;
 464 }
 465
 466 // copy one video frame into an HB buf. If the frame isn't in our color space
 467 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 468 // Otherwise just copy the bits.
 469 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 470 {
 471     AVCodecContext *context = pv->context;
 472     int w, h;
 473     if ( ! pv->job )
 474     {
 475         // if the dimensions are odd, drop the lsb since h264 requires that
 476         // both width and height be even.
 477         w = ( context->width >> 1 ) << 1;
 478         h = ( context->height >> 1 ) << 1;
 479     }
 480     else
 481     {
 482         w =  pv->job->title->width;
 483         h =  pv->job->title->height;
 484     }
 485     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 486     uint8_t *dst = buf->data;
 487
 488     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 489          h != context->height )
 490     {
 491         // have to convert to our internal color space and/or rescale
 492         AVPicture dstpic;
 493         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 494
 495         if ( ! pv->sws_context )
 496         {
 497             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 498                                               w, h, PIX_FMT_YUV420P,
 499                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 500                                               NULL, NULL, NULL );
 501         }
 502         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 503                    dstpic.data, dstpic.linesize );
 504     }
 505     else
 506     {
 507         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 508         w = (w + 1) >> 1; h = (h + 1) >> 1;
 509         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 510         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 511     }
 512     return buf;
 513 }
 514
 515 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 516 {
 517     hb_work_private_t *pv = context->opaque;
 518     frame->pts = pv->pts;
 519     pv->pts = -1;
 520     return avcodec_default_get_buffer( context, frame );
 521 }
 522
 523 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 524 {
 525     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 526     if ( c && c->title )
 527     {
 528         hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
 529                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 530     }
 531     else
 532     {
 533         hb_log( "%s: Chapter %d at frame %u time %"PRId64,
 534                 pv->context->codec->name, chap_num, pv->nframes, pts );
 535     }
 536 }
 537
 538 static void flushDelayQueue( hb_work_private_t *pv )
 539 {
 540     hb_buffer_t *buf;
 541     int slot = pv->nframes & (HEAP_SIZE-1);
 542
 543     // flush all the video packets left on our timestamp-reordering delay q
 544     while ( ( buf = pv->delayq[slot] ) != NULL )
 545     {
 546         buf->start = heap_pop( &pv->pts_heap );
 547         hb_list_add( pv->list, buf );
 548         pv->delayq[slot] = NULL;
 549         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 550     }
 551 }
 552
 553 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 554 {
 555     int got_picture, oldlevel = 0;
 556     AVFrame frame;
 557     AVPacket avp;
 558
 559     if ( global_verbosity_level <= 1 )
 560     {
 561         oldlevel = av_log_get_level();
 562         av_log_set_level( AV_LOG_QUIET );
 563     }
 564
 565     av_init_packet( &avp );
 566     avp.data = data;
 567     avp.size = size;
 568     if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
 569     {
 570         ++pv->decode_errors;
 571     }
 572     if ( global_verbosity_level <= 1 )
 573     {
 574         av_log_set_level( oldlevel );
 575     }
 576     if( got_picture )
 577     {
 578         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 579         // packet had a pts we handed it to av_parser_parse (if the packet had
 580         // no pts we set it to -1 but before the parse we can't distinguish between
 581         // the start of a video frame with no pts & an intermediate packet of
 582         // some frame which never has a pts). we hope that when parse returns
 583         // the frame to us the pts we originally handed it will be in parser->pts.
 584         // we put this pts into pv->pts so that when a avcodec_decode_video
 585         // finally gets around to allocating an AVFrame to hold the decoded
 586         // frame we can stuff that pts into the frame. if all of these relays
 587         // worked at this point frame.pts should hold the frame's pts from the
 588         // original data stream or -1 if it didn't have one. in the latter case
 589         // we generate the next pts in sequence for it.
 590         double frame_dur = pv->duration;
 591         if ( frame_dur <= 0 )
 592         {
 593             frame_dur = 90000. * (double)pv->context->time_base.num /
 594                         (double)pv->context->time_base.den;
 595             pv->duration = frame_dur;
 596         }
 597         if ( frame.repeat_pict )
 598         {
 599             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 600         }
 601         // XXX Unlike every other video decoder, the Raw decoder doesn't
 602         //     use the standard buffer allocation routines so we never
 603         //     get to put a PTS in the frame. Do it now.
 604         if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
 605         {
 606             frame.pts = pv->pts;
 607             pv->pts = -1;
 608         }
 609         // If there was no pts for this frame, assume constant frame rate
 610         // video & estimate the next frame time from the last & duration.
 611         double pts = frame.pts;
 612         if ( pts < 0 )
 613         {
 614             pts = pv->pts_next;
 615         }
 616         pv->pts_next = pts + frame_dur;
 617
 618         hb_buffer_t *buf;
 619
 620         // if we're doing a scan or this content couldn't have been broken
 621         // by Microsoft we don't worry about timestamp reordering
 622         if ( ! pv->job || ! pv->brokenByMicrosoft )
 623         {
 624             buf = copy_frame( pv, &frame );
 625             buf->start = pts;
 626             hb_list_add( pv->list, buf );
 627             ++pv->nframes;
 628             return got_picture;
 629         }
 630
 631         // XXX This following probably addresses a libavcodec bug but I don't
 632         //     see an easy fix so we workaround it here.
 633         //
 634         // The M$ 'packed B-frames' atrocity results in decoded frames with
 635         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 636         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 637         // The frames are actually delivered in the right order but with
 638         // the wrong timestamp. To get the correct timestamp attached to
 639         // each frame we have a delay queue (longer than the max number of
 640         // b-frames) & a sorting heap for the timestamps. As each frame
 641         // comes out of the decoder the oldest frame in the queue is removed
 642         // and associated with the smallest timestamp. Then the new frame is
 643         // added to the queue & its timestamp is pushed on the heap.
 644         // This does nothing if the timestamps are correct (i.e., the video
 645         // uses a codec that Micro$oft hasn't broken yet) but the frames
 646         // get timestamped correctly even when M$ has munged them.
 647
 648         // remove the oldest picture from the frame queue (if any) &
 649         // give it the smallest timestamp from our heap. The queue size
 650         // is a power of two so we get the slot of the oldest by masking
 651         // the frame count & this will become the slot of the newest
 652         // once we've removed & processed the oldest.
 653         int slot = pv->nframes & (HEAP_SIZE-1);
 654         if ( ( buf = pv->delayq[slot] ) != NULL )
 655         {
 656             buf->start = heap_pop( &pv->pts_heap );
 657
 658             if ( pv->new_chap && buf->start >= pv->chap_time )
 659             {
 660                 buf->new_chap = pv->new_chap;
 661                 pv->new_chap = 0;
 662                 pv->chap_time = 0;
 663                 log_chapter( pv, buf->new_chap, buf->start );
 664             }
 665             else if ( pv->nframes == 0 )
 666             {
 667                 log_chapter( pv, pv->job->chapter_start, buf->start );
 668             }
 669             hb_list_add( pv->list, buf );
 670         }
 671
 672         // add the new frame to the delayq & push its timestamp on the heap
 673         pv->delayq[slot] = copy_frame( pv, &frame );
 674         heap_push( &pv->pts_heap, pts );
 675
 676         ++pv->nframes;
 677     }
 678
 679     return got_picture;
 680 }
 681
 682 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 683                          int64_t pts, int64_t dts )
 684 {
 685     /*
 686      * The following loop is a do..while because we need to handle both
 687      * data & the flush at the end (signaled by size=0). At the end there's
 688      * generally a frame in the parser & one or more frames in the decoder
 689      * (depending on the bframes setting).
 690      */
 691     int pos = 0;
 692     do {
 693         uint8_t *pout;
 694         int pout_len;
 695         int len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
 696                                     data + pos, size - pos, pts, dts, AV_NOPTS_VALUE );
 697         pos += len;
 698
 699         if ( pout_len > 0 )
 700         {
 701             pv->pts = pv->parser->pts;
 702             decodeFrame( pv, pout, pout_len );
 703         }
 704     } while ( pos < size );
 705
 706     /* the stuff above flushed the parser, now flush the decoder */
 707     if ( size <= 0 )
 708     {
 709         while ( decodeFrame( pv, NULL, 0 ) )
 710         {
 711         }
 712         flushDelayQueue( pv );
 713     }
 714 }
 715
 716 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 717 {
 718     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 719
 720     if ( head )
 721     {
 722         hb_list_rem( pv->list, head );
 723
 724         hb_buffer_t *last = head, *buf;
 725
 726         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 727         {
 728             hb_list_rem( pv->list, buf );
 729             last->next = buf;
 730             last = buf;
 731         }
 732     }
 733     return head;
 734 }
 735
 736
 737 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 738 {
 739
 740     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 741     w->private_data = pv;
 742     pv->job   = job;
 743     pv->list = hb_list_init();
 744
 745     int codec_id = w->codec_param;
 746     pv->parser = av_parser_init( codec_id );
 747     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 748
 749     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 750     pv->context->opaque = pv;
 751     pv->context->get_buffer = get_frame_buf;
 752
 753     return 0;
 754 }
 755
 756 static int next_hdr( hb_buffer_t *in, int offset )
 757 {
 758     uint8_t *dat = in->data;
 759     uint16_t last2 = 0xffff;
 760     for ( ; in->size - offset > 1; ++offset )
 761     {
 762         if ( last2 == 0 && dat[offset] == 0x01 )
 763             // found an mpeg start code
 764             return offset - 2;
 765
 766         last2 = ( last2 << 8 ) | dat[offset];
 767     }
 768
 769     return -1;
 770 }
 771
 772 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 773 {
 774     if ( in->size - offset < 4 )
 775         // not enough room for an mpeg start code
 776         return -1;
 777
 778     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 779     {
 780         if ( in->data[offset+3] == hdr_type )
 781             // found it
 782             break;
 783     }
 784     return offset;
 785 }
 786
 787 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 788 {
 789     hb_work_private_t *pv = w->private_data;
 790
 791     // we can't call the avstream funcs but the read_header func in the
 792     // AVInputFormat may set up some state in the AVContext. In particular
 793     // vc1t_read_header allocates 'extradata' to deal with header issues
 794     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 795     // of space to make vc1 work then associate the codec with the context.
 796     if ( w->codec_param != CODEC_ID_VC1 )
 797     {
 798         // we haven't been inflicted with M$ - allocate a little space as
 799         // a marker and return success.
 800         pv->context->extradata_size = 16;
 801         pv->context->extradata = av_malloc(pv->context->extradata_size);
 802         return 0;
 803     }
 804
 805     // find the start and and of the sequence header
 806     int shdr, shdr_end;
 807     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 808     {
 809         // didn't find start of seq hdr
 810         return 1;
 811     }
 812     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 813     {
 814         shdr_end = in->size;
 815     }
 816     shdr_end -= shdr;
 817
 818     // find the start and and of the entry point header
 819     int ehdr, ehdr_end;
 820     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 821     {
 822         // didn't find start of entry point hdr
 823         return 1;
 824     }
 825     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 826     {
 827         ehdr_end = in->size;
 828     }
 829     ehdr_end -= ehdr;
 830
 831     // found both headers - allocate an extradata big enough to hold both
 832     // then copy them into it.
 833     pv->context->extradata_size = shdr_end + ehdr_end;
 834     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 835     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 836     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 837     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 838     return 0;
 839 }
 840
 841 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 842                             hb_buffer_t ** buf_out )
 843 {
 844     hb_work_private_t *pv = w->private_data;
 845     hb_buffer_t *in = *buf_in;
 846     int64_t pts = AV_NOPTS_VALUE;
 847     int64_t dts = pts;
 848
 849     *buf_in = NULL;
 850
 851     /* if we got an empty buffer signaling end-of-stream send it downstream */
 852     if ( in->size == 0 )
 853     {
 854         decodeVideo( pv, in->data, in->size, pts, dts );
 855         hb_list_add( pv->list, in );
 856         *buf_out = link_buf_list( pv );
 857         return HB_WORK_DONE;
 858     }
 859
 860     // if this is the first frame open the codec (we have to wait for the
 861     // first frame because of M$ VC1 braindamage).
 862     if ( pv->context->extradata_size == 0 )
 863     {
 864         if ( setup_extradata( w, in ) )
 865         {
 866             // we didn't find the headers needed to set up extradata.
 867             // the codec will abort if we open it so just free the buf
 868             // and hope we eventually get the info we need.
 869             hb_buffer_close( &in );
 870             return HB_WORK_OK;
 871         }
 872         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 873         // There's a mis-feature in ffmpeg that causes the context to be
 874         // incorrectly initialized the 1st time avcodec_open is called.
 875         // If you close it and open a 2nd time, it finishes the job.
 876         hb_avcodec_open( pv->context, codec );
 877         hb_avcodec_close( pv->context );
 878         hb_avcodec_open( pv->context, codec );
 879     }
 880
 881     if( in->start >= 0 )
 882     {
 883         pts = in->start;
 884         dts = in->renderOffset;
 885     }
 886     if ( in->new_chap )
 887     {
 888         pv->new_chap = in->new_chap;
 889         pv->chap_time = pts >= 0? pts : pv->pts_next;
 890     }
 891     decodeVideo( pv, in->data, in->size, pts, dts );
 892     hb_buffer_close( &in );
 893     *buf_out = link_buf_list( pv );
 894     return HB_WORK_OK;
 895 }
 896
 897 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 898 {
 899     hb_work_private_t *pv = w->private_data;
 900
 901     memset( info, 0, sizeof(*info) );
 902
 903     if ( pv && pv->context )
 904     {
 905         AVCodecContext *context = pv->context;
 906         info->bitrate = context->bit_rate;
 907         info->width = context->width;
 908         info->height = context->height;
 909
 910         /* ffmpeg gives the frame rate in frames per second while HB wants
 911          * it in units of the 27MHz MPEG clock. */
 912         info->rate = 27000000;
 913         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 914                           context->time_base.den;
 915         if ( context->ticks_per_frame > 1 )
 916         {
 917             // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
 918             // field rate rather than frame rate so convert back to frames.
 919             info->rate_base *= context->ticks_per_frame;
 920         }
 921
 922         /* Sometimes there's no pixel aspect set in the source. In that case,
 923            assume a 1:1 PAR. Otherwise, preserve the source PAR.             */
 924         info->pixel_aspect_width = context->sample_aspect_ratio.num ?
 925                                         context->sample_aspect_ratio.num : 1;
 926         info->pixel_aspect_height = context->sample_aspect_ratio.den ?
 927                                         context->sample_aspect_ratio.den : 1;
 928
 929         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 930          * Display Aspect Ratio so we convert by scaling by the Storage
 931          * Aspect Ratio (w/h). We do the calc in floating point to get the
 932          * rounding right. */
 933         info->aspect = (double)info->pixel_aspect_width *
 934                        (double)context->width /
 935                        (double)info->pixel_aspect_height /
 936                        (double)context->height;
 937
 938         info->profile = context->profile;
 939         info->level = context->level;
 940         info->name = context->codec->name;
 941         return 1;
 942     }
 943     return 0;
 944 }
 945
 946 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 947                              hb_work_info_t *info )
 948 {
 949     return 0;
 950 }
 951
 952 hb_work_object_t hb_decavcodecv =
 953 {
 954     WORK_DECAVCODECV,
 955     "Video decoder (libavcodec)",
 956     decavcodecvInit,
 957     decavcodecvWork,
 958     decavcodecClose,
 959     decavcodecvInfo,
 960     decavcodecvBSInfo
 961 };
 962
 963
 964 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 965 // includes a parser and passes information from the parser to the decoder
 966 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 967 // We *have* to use that codec context to decode the stream or we'll get
 968 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 969 // codec context in our codec_param.
 970
 971 // this routine gets the appropriate context pointer from the ffmpeg
 972 // stream reader. it can't be called until we get the first buffer because
 973 // we can't guarantee that reader will be called before the our init
 974 // routine and if our init is called first we'll get a pointer to the
 975 // old scan stream (which has already been closed).
 976 static void init_ffmpeg_context( hb_work_object_t *w )
 977 {
 978     hb_work_private_t *pv = w->private_data;
 979     pv->context = hb_ffmpeg_context( w->codec_param );
 980
 981     // during scan the decoder gets closed & reopened which will
 982     // close the codec so reopen it if it's not there
 983     if ( ! pv->context->codec )
 984     {
 985         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 986         hb_avcodec_open( pv->context, codec );
 987     }
 988     // set up our best guess at the frame duration.
 989     // the frame rate in the codec is usually bogus but it's sometimes
 990     // ok in the stream.
 991     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 992
 993     if ( st->nb_frames && st->duration )
 994     {
 995         // compute the average frame duration from the total number
 996         // of frames & the total duration.
 997         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 998                        ( (double)st->nb_frames * (double)st->time_base.den );
 999     }
1000     else
1001     {
1002         // XXX We don't have a frame count or duration so try to use the
1003         // far less reliable time base info in the stream.
1004         // Because the time bases are so screwed up, we only take values
1005         // in the range 8fps - 64fps.
1006         AVRational tb;
1007         if ( st->time_base.num * 64 > st->time_base.den &&
1008              st->time_base.den > st->time_base.num * 8 )
1009         {
1010             tb = st->time_base;
1011         }
1012         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
1013                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
1014         {
1015             tb.num = st->r_frame_rate.den;
1016             tb.den = st->r_frame_rate.num;
1017         }
1018         else
1019         {
1020             tb.num = 1001;  /*XXX*/
1021             tb.den = 24000; /*XXX*/
1022         }
1023         pv->duration =  (double)tb.num / (double)tb.den;
1024     }
1025     pv->duration *= 90000.;
1026
1027     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1028     pv->context->opaque = pv;
1029     pv->context->get_buffer = get_frame_buf;
1030
1031     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1032     // b-frames abortion that messes up frame ordering and timestamps.
1033     // XXX ffmpeg knows which streams are broken but doesn't expose the
1034     //     info externally. We should patch ffmpeg to add a flag to the
1035     //     codec context for this but until then we mark all ffmpeg streams
1036     //     as suspicious.
1037     pv->brokenByMicrosoft = 1;
1038 }
1039
1040 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1041 {
1042     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1043     // will seg fault in odd, data dependent ways if it's not there. (my guess
1044     // is this is a case of a local performance optimization creating a global
1045     // performance degradation since all the time wasted by extraneous data
1046     // copies & memory zeroing has to be huge compared to the minor reduction
1047     // in inner-loop instructions this affords - modern cpus bottleneck on
1048     // memory bandwidth not instruction bandwidth).
1049     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1050     {
1051         // have to realloc to add the padding
1052         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1053     }
1054     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1055 }
1056
1057 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1058 {
1059
1060     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1061     w->private_data = pv;
1062     pv->job   = job;
1063     pv->list = hb_list_init();
1064     pv->pts_next = -1;
1065     pv->pts = -1;
1066     return 0;
1067 }
1068
1069 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1070                              hb_buffer_t ** buf_out )
1071 {
1072     hb_work_private_t *pv = w->private_data;
1073     if ( ! pv->context )
1074     {
1075         init_ffmpeg_context( w );
1076     }
1077     hb_buffer_t *in = *buf_in;
1078     *buf_in = NULL;
1079
1080     /* if we got an empty buffer signaling end-of-stream send it downstream */
1081     if ( in->size == 0 )
1082     {
1083         /* flush any frames left in the decoder */
1084         while ( decodeFrame( pv, NULL, 0 ) )
1085         {
1086         }
1087         flushDelayQueue( pv );
1088         hb_list_add( pv->list, in );
1089         *buf_out = link_buf_list( pv );
1090         return HB_WORK_DONE;
1091     }
1092
1093     int64_t pts = in->start;
1094     if( pts >= 0 )
1095     {
1096         // use the first timestamp as our 'next expected' pts
1097         if ( pv->pts_next < 0 )
1098         {
1099             pv->pts_next = pts;
1100         }
1101         pv->pts = pts;
1102     }
1103
1104     if ( in->new_chap )
1105     {
1106         pv->new_chap = in->new_chap;
1107         pv->chap_time = pts >= 0? pts : pv->pts_next;
1108     }
1109     prepare_ffmpeg_buffer( in );
1110     decodeFrame( pv, in->data, in->size );
1111     hb_buffer_close( &in );
1112     *buf_out = link_buf_list( pv );
1113     return HB_WORK_OK;
1114 }
1115
1116 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1117 {
1118     if ( decavcodecvInfo( w, info ) )
1119     {
1120         hb_work_private_t *pv = w->private_data;
1121         if ( ! pv->context )
1122         {
1123             init_ffmpeg_context( w );
1124         }
1125         // we have the frame duration in units of the 90KHz pts clock but
1126         // need it in units of the 27MHz MPEG clock. */
1127         info->rate = 27000000;
1128         info->rate_base = pv->duration * 300.;
1129         return 1;
1130     }
1131     return 0;
1132 }
1133
1134 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1135 {
1136     AVCodecContext *context = pv->context;
1137     int pos = 0;
1138
1139     while ( pos < size )
1140     {
1141         int16_t *buffer = pv->buffer;
1142         if ( buffer == NULL )
1143         {
1144             pv->buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1145             buffer = pv->buffer;
1146         }
1147
1148         AVPacket avp;
1149         av_init_packet( &avp );
1150         avp.data = data + pos;
1151         avp.size = size - pos;
1152
1153         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1154         int len = avcodec_decode_audio3( context, buffer, &out_size, &avp );
1155         if ( len <= 0 )
1156         {
1157             return;
1158         }
1159         pos += len;
1160         if( out_size > 0 )
1161         {
1162             // We require signed 16-bit ints for the output format. If
1163             // we got something different convert it.
1164             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1165             {
1166                 // Note: av_audio_convert seems to be a work-in-progress but
1167                 //       looks like it will eventually handle general audio
1168                 //       mixdowns which would allow us much more flexibility
1169                 //       in handling multichannel audio in HB. If we were doing
1170                 //       anything more complicated than a one-for-one format
1171                 //       conversion we'd probably want to cache the converter
1172                 //       context in the pv.
1173                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1174                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1175                                                               context->sample_fmt, 1,
1176                                                               NULL, 0 );
1177                 // get output buffer size (in 2-byte samples) then malloc a buffer
1178                 out_size = ( out_size * 2 ) / isamp;
1179                 buffer = av_malloc( out_size );
1180
1181                 // we're doing straight sample format conversion which behaves as if
1182                 // there were only one channel.
1183                 const void * const ibuf[6] = { pv->buffer };
1184                 void * const obuf[6] = { buffer };
1185                 const int istride[6] = { isamp };
1186                 const int ostride[6] = { 2 };
1187
1188                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1189                 av_audio_convert_free( ctx );
1190             }
1191             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1192
1193             // convert from bytes to total samples
1194             out_size >>= 1;
1195
1196             double pts = pv->pts_next;
1197             buf->start = pts;
1198             pts += out_size * pv->duration;
1199             buf->stop  = pts;
1200             pv->pts_next = pts;
1201
1202             float *fl32 = (float *)buf->data;
1203             int i;
1204             for( i = 0; i < out_size; ++i )
1205             {
1206                 fl32[i] = buffer[i];
1207             }
1208             hb_list_add( pv->list, buf );
1209
1210             // if we allocated a buffer for sample format conversion, free it
1211             if ( buffer != pv->buffer )
1212             {
1213                 av_free( buffer );
1214             }
1215         }
1216     }
1217 }
1218
1219 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1220                     hb_buffer_t **buf_out )
1221 {
1222     if ( (*buf_in)->size <= 0 )
1223     {
1224         /* EOF on input stream - send it downstream & say that we're done */
1225         *buf_out = *buf_in;
1226         *buf_in = NULL;
1227         return HB_WORK_DONE;
1228     }
1229
1230     hb_work_private_t *pv = w->private_data;
1231
1232     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1233     {
1234         // discard buffers that start before video time 0
1235         *buf_out = NULL;
1236         return HB_WORK_OK;
1237     }
1238
1239     if ( ! pv->context )
1240     {
1241         init_ffmpeg_context( w );
1242         // duration is a scaling factor to go from #bytes in the decoded
1243         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1244         // total samples to per-channel samples. 'sample_rate' converts
1245         // per-channel samples to seconds per sample and the 90000
1246         // is mpeg ticks per second.
1247         pv->duration = 90000. /
1248                     (double)( pv->context->sample_rate * pv->context->channels );
1249     }
1250     hb_buffer_t *in = *buf_in;
1251
1252     // if the packet has a timestamp use it if we don't have a timestamp yet
1253     // or if there's been a timing discontinuity of more than 100ms.
1254     if ( in->start >= 0 &&
1255          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1256     {
1257         pv->pts_next = in->start;
1258     }
1259     prepare_ffmpeg_buffer( in );
1260     decodeAudio( pv, in->data, in->size );
1261     *buf_out = link_buf_list( pv );
1262
1263     return HB_WORK_OK;
1264 }
1265
1266 hb_work_object_t hb_decavcodecvi =
1267 {
1268     WORK_DECAVCODECVI,
1269     "Video decoder (ffmpeg streams)",
1270     decavcodecviInit,
1271     decavcodecviWork,
1272     decavcodecClose,
1273     decavcodecviInfo,
1274     decavcodecvBSInfo
1275 };
1276
1277 hb_work_object_t hb_decavcodecai =
1278 {
1279     WORK_DECAVCODECAI,
1280     "Audio decoder (ffmpeg streams)",
1281     decavcodecviInit,
1282     decavcodecaiWork,
1283     decavcodecClose,
1284     decavcodecInfo,
1285     decavcodecBSInfo
1286 };