libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static int64_t heap_pop( pts_heap_t *heap )
 114 {
 115     int64_t result;
 116
 117     if ( heap->nheap <= 0 )
 118     {
 119         return -1;
 120     }
 121
 122     // return the top of the heap then put the bottom element on top,
 123     // decrease the heap size by one & rebalence the heap.
 124     result = heap->h[1];
 125
 126     int64_t v = heap->h[heap->nheap--];
 127     int parent = 1;
 128     int child = parent << 1;
 129     while ( child <= heap->nheap )
 130     {
 131         // find the smallest of the two children of parent
 132         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 133             ++child;
 134
 135         if (v <= heap->h[child])
 136             // new item is smaller than either child so it's the new parent.
 137             break;
 138
 139         // smallest child is smaller than new item so move it up then
 140         // check its children.
 141         int64_t hp = heap->h[child];
 142         heap->h[parent] = hp;
 143         parent = child;
 144         child = parent << 1;
 145     }
 146     heap->h[parent] = v;
 147     return result;
 148 }
 149
 150 static void heap_push( pts_heap_t *heap, int64_t v )
 151 {
 152     if ( heap->nheap < HEAP_SIZE )
 153     {
 154         ++heap->nheap;
 155     }
 156
 157     // stick the new value on the bottom of the heap then bubble it
 158     // up to its correct spot.
 159         int child = heap->nheap;
 160         while (child > 1) {
 161                 int parent = child >> 1;
 162                 if (heap->h[parent] <= v)
 163                         break;
 164                 // move parent down
 165                 int64_t hp = heap->h[parent];
 166                 heap->h[child] = hp;
 167                 child = parent;
 168         }
 169         heap->h[child] = v;
 170 }
 171
 172
 173 /***********************************************************************
 174  * hb_work_decavcodec_init
 175  ***********************************************************************
 176  *
 177  **********************************************************************/
 178 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 179 {
 180     AVCodec * codec;
 181
 182     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 183     w->private_data = pv;
 184
 185     pv->job   = job;
 186
 187     int codec_id = w->codec_param;
 188     /*XXX*/
 189     if ( codec_id == 0 )
 190         codec_id = CODEC_ID_MP2;
 191
 192     codec = avcodec_find_decoder( codec_id );
 193     pv->parser = av_parser_init( codec_id );
 194
 195     pv->context = avcodec_alloc_context();
 196     hb_avcodec_open( pv->context, codec );
 197
 198     return 0;
 199 }
 200
 201 /***********************************************************************
 202  * Close
 203  ***********************************************************************
 204  *
 205  **********************************************************************/
 206 static void decavcodecClose( hb_work_object_t * w )
 207 {
 208     hb_work_private_t * pv = w->private_data;
 209
 210     if ( pv )
 211     {
 212         if ( pv->job && pv->context && pv->context->codec )
 213         {
 214             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 215                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 216                     pv->ndrops );
 217         }
 218         if ( pv->sws_context )
 219         {
 220             sws_freeContext( pv->sws_context );
 221         }
 222         if ( pv->parser )
 223         {
 224             av_parser_close(pv->parser);
 225         }
 226         if ( pv->context && pv->context->codec )
 227         {
 228             hb_avcodec_close( pv->context );
 229         }
 230         if ( pv->list )
 231         {
 232             hb_list_close( &pv->list );
 233         }
 234         if ( pv->buffer )
 235         {
 236             free( pv->buffer );
 237             pv->buffer = NULL;
 238         }
 239         free( pv );
 240         w->private_data = NULL;
 241     }
 242 }
 243
 244 /***********************************************************************
 245  * Work
 246  ***********************************************************************
 247  *
 248  **********************************************************************/
 249 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 250                     hb_buffer_t ** buf_out )
 251 {
 252     hb_work_private_t * pv = w->private_data;
 253     hb_buffer_t * in = *buf_in, * buf, * last = NULL;
 254     int   pos, len, out_size, i, uncompressed_len;
 255     short* bufaligned;
 256     uint64_t cur;
 257     unsigned char *parser_output_buffer;
 258     int parser_output_buffer_len;
 259
 260     if ( (*buf_in)->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = *buf_in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     cur = ( in->start < 0 )? pv->pts_next : in->start;
 277
 278     bufaligned = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 279     pos = 0;
 280     while( pos < in->size )
 281     {
 282         len = av_parser_parse( pv->parser, pv->context,
 283                                &parser_output_buffer, &parser_output_buffer_len,
 284                                in->data + pos, in->size - pos, cur, cur );
 285         out_size = 0;
 286         uncompressed_len = 0;
 287         if (parser_output_buffer_len)
 288         {
 289             out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 290             uncompressed_len = avcodec_decode_audio2( pv->context, bufaligned,
 291                                                       &out_size,
 292                                                       parser_output_buffer,
 293                                                       parser_output_buffer_len );
 294         }
 295         if( out_size )
 296         {
 297             short * s16;
 298             float * fl32;
 299
 300             buf = hb_buffer_init( 2 * out_size );
 301
 302             int sample_size_in_bytes = 2;   // Default to 2 bytes
 303             switch (pv->context->sample_fmt)
 304             {
 305               case SAMPLE_FMT_S16:
 306                 sample_size_in_bytes = 2;
 307                 break;
 308               /* We should handle other formats here - but that needs additional format conversion work below */
 309               /* For now we'll just report the error and try to carry on */
 310               default:
 311                 hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
 312                 break;
 313             }
 314
 315             buf->start = cur;
 316             buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
 317                          pv->context->sample_rate;
 318             cur = buf->stop;
 319
 320             s16  = bufaligned;
 321             fl32 = (float *) buf->data;
 322             for( i = 0; i < out_size / 2; i++ )
 323             {
 324                 fl32[i] = s16[i];
 325             }
 326
 327             if( last )
 328             {
 329                 last = last->next = buf;
 330             }
 331             else
 332             {
 333                 *buf_out = last = buf;
 334             }
 335         }
 336
 337         pos += len;
 338     }
 339
 340     pv->pts_next = cur;
 341
 342     free( bufaligned );
 343     return HB_WORK_OK;
 344 }
 345
 346 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 347 {
 348     hb_work_private_t *pv = w->private_data;
 349
 350     memset( info, 0, sizeof(*info) );
 351
 352     if ( pv && pv->context )
 353     {
 354         AVCodecContext *context = pv->context;
 355         info->bitrate = context->bit_rate;
 356         info->rate = context->time_base.num;
 357         info->rate_base = context->time_base.den;
 358         info->profile = context->profile;
 359         info->level = context->level;
 360         return 1;
 361     }
 362     return 0;
 363 }
 364
 365 static const int chan2layout[] = {
 366     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 367     HB_INPUT_CH_LAYOUT_MONO,
 368     HB_INPUT_CH_LAYOUT_STEREO,
 369     HB_INPUT_CH_LAYOUT_2F1R,
 370     HB_INPUT_CH_LAYOUT_2F2R,
 371     HB_INPUT_CH_LAYOUT_3F2R,
 372     HB_INPUT_CH_LAYOUT_4F2R,
 373     HB_INPUT_CH_LAYOUT_STEREO,
 374     HB_INPUT_CH_LAYOUT_STEREO,
 375 };
 376
 377 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 378                              hb_work_info_t *info )
 379 {
 380     hb_work_private_t *pv = w->private_data;
 381     int ret = 0;
 382
 383     memset( info, 0, sizeof(*info) );
 384
 385     if ( pv && pv->context )
 386     {
 387         return decavcodecInfo( w, info );
 388     }
 389     // XXX
 390     // We should parse the bitstream to find its parameters but for right
 391     // now we just return dummy values if there's a codec that will handle it.
 392     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 393                                                            CODEC_ID_MP2 );
 394     if ( ! codec )
 395     {
 396         // there's no ffmpeg codec for this audio type - give up
 397         return -1;
 398     }
 399
 400     static char codec_name[64];
 401     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 402
 403     AVCodecParserContext *parser = av_parser_init( codec->id );
 404     AVCodecContext *context = avcodec_alloc_context();
 405     hb_avcodec_open( context, codec );
 406     uint8_t *buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 407     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 408     unsigned char *pbuffer;
 409     int pos = 0, pbuffer_size;
 410
 411     while ( pos < buf->size )
 412     {
 413         int len = av_parser_parse( parser, context, &pbuffer, &pbuffer_size,
 414                                    buf->data + pos, buf->size - pos,
 415                                    buf->start, buf->start );
 416         pos += len;
 417         if ( pbuffer_size > 0 )
 418         {
 419             len = avcodec_decode_audio2( context, (int16_t*)buffer, &out_size,
 420                                          pbuffer, pbuffer_size );
 421             if ( len > 0 && context->sample_rate > 0 )
 422             {
 423                 info->bitrate = context->bit_rate;
 424                 info->rate = context->sample_rate;
 425                 info->rate_base = 1;
 426                 info->channel_layout = chan2layout[context->channels & 7];
 427                 ret = 1;
 428                 break;
 429             }
 430         }
 431     }
 432     free( buffer );
 433     av_parser_close( parser );
 434     hb_avcodec_close( context );
 435     return ret;
 436 }
 437
 438 /* -------------------------------------------------------------
 439  * General purpose video decoder using libavcodec
 440  */
 441
 442 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 443                             int h )
 444 {
 445     if ( dstride == sstride )
 446     {
 447         memcpy( dst, src, dstride * h );
 448         return dst + dstride * h;
 449     }
 450     int lbytes = dstride <= sstride? dstride : sstride;
 451     while ( --h >= 0 )
 452     {
 453         memcpy( dst, src, lbytes );
 454         src += sstride;
 455         dst += dstride;
 456     }
 457     return dst;
 458 }
 459
 460 // copy one video frame into an HB buf. If the frame isn't in our color space
 461 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 462 // Otherwise just copy the bits.
 463 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 464 {
 465     AVCodecContext *context = pv->context;
 466     int w, h;
 467     if ( ! pv->job )
 468     {
 469         // if the dimensions are odd, drop the lsb since h264 requires that
 470         // both width and height be even.
 471         w = ( context->width >> 1 ) << 1;
 472         h = ( context->height >> 1 ) << 1;
 473     }
 474     else
 475     {
 476         w =  pv->job->title->width;
 477         h =  pv->job->title->height;
 478     }
 479     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 480     uint8_t *dst = buf->data;
 481
 482     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 483          h != context->height )
 484     {
 485         // have to convert to our internal color space and/or rescale
 486         AVPicture dstpic;
 487         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 488
 489         if ( ! pv->sws_context )
 490         {
 491             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 492                                               w, h, PIX_FMT_YUV420P,
 493                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 494                                               NULL, NULL, NULL );
 495         }
 496         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 497                    dstpic.data, dstpic.linesize );
 498     }
 499     else
 500     {
 501         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 502         w = (w + 1) >> 1; h = (h + 1) >> 1;
 503         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 504         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 505     }
 506     return buf;
 507 }
 508
 509 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 510 {
 511     hb_work_private_t *pv = context->opaque;
 512     frame->pts = pv->pts;
 513     pv->pts = -1;
 514     return avcodec_default_get_buffer( context, frame );
 515 }
 516
 517 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 518 {
 519     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 520     if ( c && c->title )
 521     {
 522         hb_log( "%s: \"%s\" (%d) at frame %u time %lld",
 523                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 524     }
 525     else
 526     {
 527         hb_log( "%s: Chapter %d at frame %u time %lld",
 528                 pv->context->codec->name, chap_num, pv->nframes, pts );
 529     }
 530 }
 531
 532 static void flushDelayQueue( hb_work_private_t *pv )
 533 {
 534     hb_buffer_t *buf;
 535     int slot = pv->nframes & (HEAP_SIZE-1);
 536
 537     // flush all the video packets left on our timestamp-reordering delay q
 538     while ( ( buf = pv->delayq[slot] ) != NULL )
 539     {
 540         buf->start = heap_pop( &pv->pts_heap );
 541         hb_list_add( pv->list, buf );
 542         pv->delayq[slot] = NULL;
 543         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 544     }
 545 }
 546
 547 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 548 {
 549     int got_picture, oldlevel = 0;
 550     AVFrame frame;
 551
 552     if ( global_verbosity_level <= 1 )
 553     {
 554         oldlevel = av_log_get_level();
 555         av_log_set_level( AV_LOG_QUIET );
 556     }
 557     if ( avcodec_decode_video( pv->context, &frame, &got_picture, data, size ) < 0 )
 558     {
 559         ++pv->decode_errors;
 560     }
 561     if ( global_verbosity_level <= 1 )
 562     {
 563         av_log_set_level( oldlevel );
 564     }
 565     if( got_picture )
 566     {
 567         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 568         // packet had a pts we handed it to av_parser_parse (if the packet had
 569         // no pts we set it to -1 but before the parse we can't distinguish between
 570         // the start of a video frame with no pts & an intermediate packet of
 571         // some frame which never has a pts). we hope that when parse returns
 572         // the frame to us the pts we originally handed it will be in parser->pts.
 573         // we put this pts into pv->pts so that when a avcodec_decode_video
 574         // finally gets around to allocating an AVFrame to hold the decoded
 575         // frame we can stuff that pts into the frame. if all of these relays
 576         // worked at this point frame.pts should hold the frame's pts from the
 577         // original data stream or -1 if it didn't have one. in the latter case
 578         // we generate the next pts in sequence for it.
 579         double frame_dur = pv->duration;
 580         if ( frame_dur <= 0 )
 581         {
 582             frame_dur = 90000. * (double)pv->context->time_base.num /
 583                         (double)pv->context->time_base.den;
 584             pv->duration = frame_dur;
 585         }
 586         if ( frame.repeat_pict )
 587         {
 588             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 589         }
 590         // XXX Unlike every other video decoder, the Raw decoder doesn't
 591         //     use the standard buffer allocation routines so we never
 592         //     get to put a PTS in the frame. Do it now.
 593         if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
 594         {
 595             frame.pts = pv->pts;
 596             pv->pts = -1;
 597         }
 598         // If there was no pts for this frame, assume constant frame rate
 599         // video & estimate the next frame time from the last & duration.
 600         double pts = frame.pts;
 601         if ( pts < 0 )
 602         {
 603             pts = pv->pts_next;
 604         }
 605         pv->pts_next = pts + frame_dur;
 606
 607         hb_buffer_t *buf;
 608
 609         // if we're doing a scan or this content couldn't have been broken
 610         // by Microsoft we don't worry about timestamp reordering
 611         if ( ! pv->job || ! pv->brokenByMicrosoft )
 612         {
 613             buf = copy_frame( pv, &frame );
 614             buf->start = pts;
 615             hb_list_add( pv->list, buf );
 616             ++pv->nframes;
 617             return got_picture;
 618         }
 619
 620         // XXX This following probably addresses a libavcodec bug but I don't
 621         //     see an easy fix so we workaround it here.
 622         //
 623         // The M$ 'packed B-frames' atrocity results in decoded frames with
 624         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 625         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 626         // The frames are actually delivered in the right order but with
 627         // the wrong timestamp. To get the correct timestamp attached to
 628         // each frame we have a delay queue (longer than the max number of
 629         // b-frames) & a sorting heap for the timestamps. As each frame
 630         // comes out of the decoder the oldest frame in the queue is removed
 631         // and associated with the smallest timestamp. Then the new frame is
 632         // added to the queue & its timestamp is pushed on the heap.
 633         // This does nothing if the timestamps are correct (i.e., the video
 634         // uses a codec that Micro$oft hasn't broken yet) but the frames
 635         // get timestamped correctly even when M$ has munged them.
 636
 637         // remove the oldest picture from the frame queue (if any) &
 638         // give it the smallest timestamp from our heap. The queue size
 639         // is a power of two so we get the slot of the oldest by masking
 640         // the frame count & this will become the slot of the newest
 641         // once we've removed & processed the oldest.
 642         int slot = pv->nframes & (HEAP_SIZE-1);
 643         if ( ( buf = pv->delayq[slot] ) != NULL )
 644         {
 645             buf->start = heap_pop( &pv->pts_heap );
 646
 647             if ( pv->new_chap && buf->start >= pv->chap_time )
 648             {
 649                 buf->new_chap = pv->new_chap;
 650                 pv->new_chap = 0;
 651                 pv->chap_time = 0;
 652                 log_chapter( pv, buf->new_chap, buf->start );
 653             }
 654             else if ( pv->nframes == 0 )
 655             {
 656                 log_chapter( pv, pv->job->chapter_start, buf->start );
 657             }
 658             hb_list_add( pv->list, buf );
 659         }
 660
 661         // add the new frame to the delayq & push its timestamp on the heap
 662         pv->delayq[slot] = copy_frame( pv, &frame );
 663         heap_push( &pv->pts_heap, pts );
 664
 665         ++pv->nframes;
 666     }
 667
 668     return got_picture;
 669 }
 670
 671 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 672                          int64_t pts, int64_t dts )
 673 {
 674     /*
 675      * The following loop is a do..while because we need to handle both
 676      * data & the flush at the end (signaled by size=0). At the end there's
 677      * generally a frame in the parser & one or more frames in the decoder
 678      * (depending on the bframes setting).
 679      */
 680     int pos = 0;
 681     do {
 682         uint8_t *pout;
 683         int pout_len;
 684         int len = av_parser_parse( pv->parser, pv->context, &pout, &pout_len,
 685                                    data + pos, size - pos, pts, dts );
 686         pos += len;
 687
 688         if ( pout_len > 0 )
 689         {
 690             pv->pts = pv->parser->pts;
 691             decodeFrame( pv, pout, pout_len );
 692         }
 693     } while ( pos < size );
 694
 695     /* the stuff above flushed the parser, now flush the decoder */
 696     if ( size <= 0 )
 697     {
 698         while ( decodeFrame( pv, NULL, 0 ) )
 699         {
 700         }
 701         flushDelayQueue( pv );
 702     }
 703 }
 704
 705 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 706 {
 707     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 708
 709     if ( head )
 710     {
 711         hb_list_rem( pv->list, head );
 712
 713         hb_buffer_t *last = head, *buf;
 714
 715         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 716         {
 717             hb_list_rem( pv->list, buf );
 718             last->next = buf;
 719             last = buf;
 720         }
 721     }
 722     return head;
 723 }
 724
 725
 726 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 727 {
 728
 729     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 730     w->private_data = pv;
 731     pv->job   = job;
 732     pv->list = hb_list_init();
 733
 734     int codec_id = w->codec_param;
 735     pv->parser = av_parser_init( codec_id );
 736     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 737
 738     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 739     pv->context->opaque = pv;
 740     pv->context->get_buffer = get_frame_buf;
 741
 742     return 0;
 743 }
 744
 745 static int next_hdr( hb_buffer_t *in, int offset )
 746 {
 747     uint8_t *dat = in->data;
 748     uint16_t last2 = 0xffff;
 749     for ( ; in->size - offset > 1; ++offset )
 750     {
 751         if ( last2 == 0 && dat[offset] == 0x01 )
 752             // found an mpeg start code
 753             return offset - 2;
 754
 755         last2 = ( last2 << 8 ) | dat[offset];
 756     }
 757
 758     return -1;
 759 }
 760
 761 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 762 {
 763     if ( in->size - offset < 4 )
 764         // not enough room for an mpeg start code
 765         return -1;
 766
 767     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 768     {
 769         if ( in->data[offset+3] == hdr_type )
 770             // found it
 771             break;
 772     }
 773     return offset;
 774 }
 775
 776 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 777 {
 778     hb_work_private_t *pv = w->private_data;
 779
 780     // we can't call the avstream funcs but the read_header func in the
 781     // AVInputFormat may set up some state in the AVContext. In particular
 782     // vc1t_read_header allocates 'extradata' to deal with header issues
 783     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 784     // of space to make vc1 work then associate the codec with the context.
 785     if ( w->codec_param != CODEC_ID_VC1 )
 786     {
 787         // we haven't been inflicted with M$ - allocate a little space as
 788         // a marker and return success.
 789         pv->context->extradata_size = 16;
 790         pv->context->extradata = av_malloc(pv->context->extradata_size);
 791         return 0;
 792     }
 793
 794     // find the start and and of the sequence header
 795     int shdr, shdr_end;
 796     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 797     {
 798         // didn't find start of seq hdr
 799         return 1;
 800     }
 801     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 802     {
 803         shdr_end = in->size;
 804     }
 805     shdr_end -= shdr;
 806
 807     // find the start and and of the entry point header
 808     int ehdr, ehdr_end;
 809     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 810     {
 811         // didn't find start of entry point hdr
 812         return 1;
 813     }
 814     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 815     {
 816         ehdr_end = in->size;
 817     }
 818     ehdr_end -= ehdr;
 819
 820     // found both headers - allocate an extradata big enough to hold both
 821     // then copy them into it.
 822     pv->context->extradata_size = shdr_end + ehdr_end;
 823     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 824     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 825     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 826     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 827     return 0;
 828 }
 829
 830 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 831                             hb_buffer_t ** buf_out )
 832 {
 833     hb_work_private_t *pv = w->private_data;
 834     hb_buffer_t *in = *buf_in;
 835     int64_t pts = AV_NOPTS_VALUE;
 836     int64_t dts = pts;
 837
 838     *buf_in = NULL;
 839
 840     /* if we got an empty buffer signaling end-of-stream send it downstream */
 841     if ( in->size == 0 )
 842     {
 843         decodeVideo( pv, in->data, in->size, pts, dts );
 844         hb_list_add( pv->list, in );
 845         *buf_out = link_buf_list( pv );
 846         return HB_WORK_DONE;
 847     }
 848
 849     // if this is the first frame open the codec (we have to wait for the
 850     // first frame because of M$ VC1 braindamage).
 851     if ( pv->context->extradata_size == 0 )
 852     {
 853         if ( setup_extradata( w, in ) )
 854         {
 855             // we didn't find the headers needed to set up extradata.
 856             // the codec will abort if we open it so just free the buf
 857             // and hope we eventually get the info we need.
 858             hb_buffer_close( &in );
 859             return HB_WORK_OK;
 860         }
 861         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 862         // There's a mis-feature in ffmpeg that causes the context to be
 863         // incorrectly initialized the 1st time avcodec_open is called.
 864         // If you close it and open a 2nd time, it finishes the job.
 865         hb_avcodec_open( pv->context, codec );
 866         hb_avcodec_close( pv->context );
 867         hb_avcodec_open( pv->context, codec );
 868     }
 869
 870     if( in->start >= 0 )
 871     {
 872         pts = in->start;
 873         dts = in->renderOffset;
 874     }
 875     if ( in->new_chap )
 876     {
 877         pv->new_chap = in->new_chap;
 878         pv->chap_time = pts >= 0? pts : pv->pts_next;
 879     }
 880     decodeVideo( pv, in->data, in->size, pts, dts );
 881     hb_buffer_close( &in );
 882     *buf_out = link_buf_list( pv );
 883     return HB_WORK_OK;
 884 }
 885
 886 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 887 {
 888     hb_work_private_t *pv = w->private_data;
 889
 890     memset( info, 0, sizeof(*info) );
 891
 892     if ( pv && pv->context )
 893     {
 894         AVCodecContext *context = pv->context;
 895         info->bitrate = context->bit_rate;
 896         info->width = context->width;
 897         info->height = context->height;
 898
 899         /* ffmpeg gives the frame rate in frames per second while HB wants
 900          * it in units of the 27MHz MPEG clock. */
 901         info->rate = 27000000;
 902         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 903                           context->time_base.den;
 904         if ( context->ticks_per_frame > 1 )
 905         {
 906             // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
 907             // field rate rather than frame rate so convert back to frames.
 908             info->rate_base *= context->ticks_per_frame;
 909         }
 910
 911         /* Sometimes there's no pixel aspect set in the source. In that case,
 912            assume a 1:1 PAR. Otherwise, preserve the source PAR.             */
 913         info->pixel_aspect_width = context->sample_aspect_ratio.num ?
 914                                         context->sample_aspect_ratio.num : 1;
 915         info->pixel_aspect_height = context->sample_aspect_ratio.den ?
 916                                         context->sample_aspect_ratio.den : 1;
 917
 918         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 919          * Display Aspect Ratio so we convert by scaling by the Storage
 920          * Aspect Ratio (w/h). We do the calc in floating point to get the
 921          * rounding right. */
 922         info->aspect = (double)info->pixel_aspect_width *
 923                        (double)context->width /
 924                        (double)info->pixel_aspect_height /
 925                        (double)context->height;
 926
 927         info->profile = context->profile;
 928         info->level = context->level;
 929         info->name = context->codec->name;
 930         return 1;
 931     }
 932     return 0;
 933 }
 934
 935 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 936                              hb_work_info_t *info )
 937 {
 938     return 0;
 939 }
 940
 941 hb_work_object_t hb_decavcodecv =
 942 {
 943     WORK_DECAVCODECV,
 944     "Video decoder (libavcodec)",
 945     decavcodecvInit,
 946     decavcodecvWork,
 947     decavcodecClose,
 948     decavcodecvInfo,
 949     decavcodecvBSInfo
 950 };
 951
 952
 953 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 954 // includes a parser and passes information from the parser to the decoder
 955 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 956 // We *have* to use that codec context to decode the stream or we'll get
 957 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 958 // codec context in our codec_param.
 959
 960 // this routine gets the appropriate context pointer from the ffmpeg
 961 // stream reader. it can't be called until we get the first buffer because
 962 // we can't guarantee that reader will be called before the our init
 963 // routine and if our init is called first we'll get a pointer to the
 964 // old scan stream (which has already been closed).
 965 static void init_ffmpeg_context( hb_work_object_t *w )
 966 {
 967     hb_work_private_t *pv = w->private_data;
 968     pv->context = hb_ffmpeg_context( w->codec_param );
 969
 970     // during scan the decoder gets closed & reopened which will
 971     // close the codec so reopen it if it's not there
 972     if ( ! pv->context->codec )
 973     {
 974         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 975         hb_avcodec_open( pv->context, codec );
 976     }
 977     // set up our best guess at the frame duration.
 978     // the frame rate in the codec is usually bogus but it's sometimes
 979     // ok in the stream.
 980     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 981
 982     if ( st->nb_frames && st->duration )
 983     {
 984         // compute the average frame duration from the total number
 985         // of frames & the total duration.
 986         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 987                        ( (double)st->nb_frames * (double)st->time_base.den );
 988     }
 989     else
 990     {
 991         // XXX We don't have a frame count or duration so try to use the
 992         // far less reliable time base info in the stream.
 993         // Because the time bases are so screwed up, we only take values
 994         // in the range 8fps - 64fps.
 995         AVRational tb;
 996         if ( st->time_base.num * 64 > st->time_base.den &&
 997              st->time_base.den > st->time_base.num * 8 )
 998         {
 999             tb = st->time_base;
1000         }
1001         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
1002                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
1003         {
1004             tb.num = st->r_frame_rate.den;
1005             tb.den = st->r_frame_rate.num;
1006         }
1007         else
1008         {
1009             tb.num = 1001;  /*XXX*/
1010             tb.den = 24000; /*XXX*/
1011         }
1012         pv->duration =  (double)tb.num / (double)tb.den;
1013     }
1014     pv->duration *= 90000.;
1015
1016     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1017     pv->context->opaque = pv;
1018     pv->context->get_buffer = get_frame_buf;
1019
1020     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1021     // b-frames abortion that messes up frame ordering and timestamps.
1022     // XXX ffmpeg knows which streams are broken but doesn't expose the
1023     //     info externally. We should patch ffmpeg to add a flag to the
1024     //     codec context for this but until then we mark all ffmpeg streams
1025     //     as suspicious.
1026     pv->brokenByMicrosoft = 1;
1027 }
1028
1029 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1030 {
1031     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1032     // will seg fault in odd, data dependent ways if it's not there. (my guess
1033     // is this is a case of a local performance optimization creating a global
1034     // performance degradation since all the time wasted by extraneous data
1035     // copies & memory zeroing has to be huge compared to the minor reduction
1036     // in inner-loop instructions this affords - modern cpus bottleneck on
1037     // memory bandwidth not instruction bandwidth).
1038     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1039     {
1040         // have to realloc to add the padding
1041         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1042     }
1043     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1044 }
1045
1046 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1047 {
1048
1049     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1050     w->private_data = pv;
1051     pv->job   = job;
1052     pv->list = hb_list_init();
1053     pv->pts_next = -1;
1054     pv->pts = -1;
1055     return 0;
1056 }
1057
1058 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1059                              hb_buffer_t ** buf_out )
1060 {
1061     hb_work_private_t *pv = w->private_data;
1062     if ( ! pv->context )
1063     {
1064         init_ffmpeg_context( w );
1065     }
1066     hb_buffer_t *in = *buf_in;
1067     *buf_in = NULL;
1068
1069     /* if we got an empty buffer signaling end-of-stream send it downstream */
1070     if ( in->size == 0 )
1071     {
1072         /* flush any frames left in the decoder */
1073         while ( decodeFrame( pv, NULL, 0 ) )
1074         {
1075         }
1076         flushDelayQueue( pv );
1077         hb_list_add( pv->list, in );
1078         *buf_out = link_buf_list( pv );
1079         return HB_WORK_DONE;
1080     }
1081
1082     int64_t pts = in->start;
1083     if( pts >= 0 )
1084     {
1085         // use the first timestamp as our 'next expected' pts
1086         if ( pv->pts_next < 0 )
1087         {
1088             pv->pts_next = pts;
1089         }
1090         pv->pts = pts;
1091     }
1092
1093     if ( in->new_chap )
1094     {
1095         pv->new_chap = in->new_chap;
1096         pv->chap_time = pts >= 0? pts : pv->pts_next;
1097     }
1098     prepare_ffmpeg_buffer( in );
1099     decodeFrame( pv, in->data, in->size );
1100     hb_buffer_close( &in );
1101     *buf_out = link_buf_list( pv );
1102     return HB_WORK_OK;
1103 }
1104
1105 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1106 {
1107     if ( decavcodecvInfo( w, info ) )
1108     {
1109         hb_work_private_t *pv = w->private_data;
1110         if ( ! pv->context )
1111         {
1112             init_ffmpeg_context( w );
1113         }
1114         // we have the frame duration in units of the 90KHz pts clock but
1115         // need it in units of the 27MHz MPEG clock. */
1116         info->rate = 27000000;
1117         info->rate_base = pv->duration * 300.;
1118         return 1;
1119     }
1120     return 0;
1121 }
1122
1123 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1124 {
1125     AVCodecContext *context = pv->context;
1126     int pos = 0;
1127
1128     while ( pos < size )
1129     {
1130         int16_t *buffer = pv->buffer;
1131         if ( buffer == NULL )
1132         {
1133             pv->buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1134             buffer = pv->buffer;
1135         }
1136         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1137         int len = avcodec_decode_audio2( context, buffer, &out_size,
1138                                          data + pos, size - pos );
1139         if ( len <= 0 )
1140         {
1141             return;
1142         }
1143         pos += len;
1144         if( out_size > 0 )
1145         {
1146             // We require signed 16-bit ints for the output format. If
1147             // we got something different convert it.
1148             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1149             {
1150                 // Note: av_audio_convert seems to be a work-in-progress but
1151                 //       looks like it will eventually handle general audio
1152                 //       mixdowns which would allow us much more flexibility
1153                 //       in handling multichannel audio in HB. If we were doing
1154                 //       anything more complicated than a one-for-one format
1155                 //       conversion we'd probably want to cache the converter
1156                 //       context in the pv.
1157                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1158                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1159                                                               context->sample_fmt, 1,
1160                                                               NULL, 0 );
1161                 // get output buffer size (in 2-byte samples) then malloc a buffer
1162                 out_size = ( out_size * 2 ) / isamp;
1163                 buffer = malloc( out_size );
1164
1165                 // we're doing straight sample format conversion which behaves as if
1166                 // there were only one channel.
1167                 const void * const ibuf[6] = { pv->buffer };
1168                 void * const obuf[6] = { buffer };
1169                 const int istride[6] = { isamp };
1170                 const int ostride[6] = { 2 };
1171
1172                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1173                 av_audio_convert_free( ctx );
1174             }
1175             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1176
1177             // convert from bytes to total samples
1178             out_size >>= 1;
1179
1180             double pts = pv->pts_next;
1181             buf->start = pts;
1182             pts += out_size * pv->duration;
1183             buf->stop  = pts;
1184             pv->pts_next = pts;
1185
1186             float *fl32 = (float *)buf->data;
1187             int i;
1188             for( i = 0; i < out_size; ++i )
1189             {
1190                 fl32[i] = buffer[i];
1191             }
1192             hb_list_add( pv->list, buf );
1193
1194             // if we allocated a buffer for sample format conversion, free it
1195             if ( buffer != pv->buffer )
1196             {
1197                 free( buffer );
1198             }
1199         }
1200     }
1201 }
1202
1203 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1204                     hb_buffer_t **buf_out )
1205 {
1206     if ( (*buf_in)->size <= 0 )
1207     {
1208         /* EOF on input stream - send it downstream & say that we're done */
1209         *buf_out = *buf_in;
1210         *buf_in = NULL;
1211         return HB_WORK_DONE;
1212     }
1213
1214     hb_work_private_t *pv = w->private_data;
1215
1216     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1217     {
1218         // discard buffers that start before video time 0
1219         *buf_out = NULL;
1220         return HB_WORK_OK;
1221     }
1222
1223     if ( ! pv->context )
1224     {
1225         init_ffmpeg_context( w );
1226         // duration is a scaling factor to go from #bytes in the decoded
1227         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1228         // total samples to per-channel samples. 'sample_rate' converts
1229         // per-channel samples to seconds per sample and the 90000
1230         // is mpeg ticks per second.
1231         pv->duration = 90000. /
1232                     (double)( pv->context->sample_rate * pv->context->channels );
1233     }
1234     hb_buffer_t *in = *buf_in;
1235
1236     // if the packet has a timestamp use it if we don't have a timestamp yet
1237     // or if there's been a timing discontinuity of more than 100ms.
1238     if ( in->start >= 0 &&
1239          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1240     {
1241         pv->pts_next = in->start;
1242     }
1243     prepare_ffmpeg_buffer( in );
1244     decodeAudio( pv, in->data, in->size );
1245     *buf_out = link_buf_list( pv );
1246
1247     return HB_WORK_OK;
1248 }
1249
1250 hb_work_object_t hb_decavcodecvi =
1251 {
1252     WORK_DECAVCODECVI,
1253     "Video decoder (ffmpeg streams)",
1254     decavcodecviInit,
1255     decavcodecviWork,
1256     decavcodecClose,
1257     decavcodecviInfo,
1258     decavcodecvBSInfo
1259 };
1260
1261 hb_work_object_t hb_decavcodecai =
1262 {
1263     WORK_DECAVCODECAI,
1264     "Audio decoder (ffmpeg streams)",
1265     decavcodecviInit,
1266     decavcodecaiWork,
1267     decavcodecClose,
1268     decavcodecInfo,
1269     decavcodecBSInfo
1270 };