libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static int64_t heap_pop( pts_heap_t *heap )
 114 {
 115     int64_t result;
 116
 117     if ( heap->nheap <= 0 )
 118     {
 119         return -1;
 120     }
 121
 122     // return the top of the heap then put the bottom element on top,
 123     // decrease the heap size by one & rebalence the heap.
 124     result = heap->h[1];
 125
 126     int64_t v = heap->h[heap->nheap--];
 127     int parent = 1;
 128     int child = parent << 1;
 129     while ( child <= heap->nheap )
 130     {
 131         // find the smallest of the two children of parent
 132         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 133             ++child;
 134
 135         if (v <= heap->h[child])
 136             // new item is smaller than either child so it's the new parent.
 137             break;
 138
 139         // smallest child is smaller than new item so move it up then
 140         // check its children.
 141         int64_t hp = heap->h[child];
 142         heap->h[parent] = hp;
 143         parent = child;
 144         child = parent << 1;
 145     }
 146     heap->h[parent] = v;
 147     return result;
 148 }
 149
 150 static void heap_push( pts_heap_t *heap, int64_t v )
 151 {
 152     if ( heap->nheap < HEAP_SIZE )
 153     {
 154         ++heap->nheap;
 155     }
 156
 157     // stick the new value on the bottom of the heap then bubble it
 158     // up to its correct spot.
 159         int child = heap->nheap;
 160         while (child > 1) {
 161                 int parent = child >> 1;
 162                 if (heap->h[parent] <= v)
 163                         break;
 164                 // move parent down
 165                 int64_t hp = heap->h[parent];
 166                 heap->h[child] = hp;
 167                 child = parent;
 168         }
 169         heap->h[child] = v;
 170 }
 171
 172
 173 /***********************************************************************
 174  * hb_work_decavcodec_init
 175  ***********************************************************************
 176  *
 177  **********************************************************************/
 178 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 179 {
 180     AVCodec * codec;
 181
 182     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 183     w->private_data = pv;
 184
 185     pv->job   = job;
 186
 187     int codec_id = w->codec_param;
 188     /*XXX*/
 189     if ( codec_id == 0 )
 190         codec_id = CODEC_ID_MP2;
 191
 192     codec = avcodec_find_decoder( codec_id );
 193     pv->parser = av_parser_init( codec_id );
 194
 195     pv->context = avcodec_alloc_context();
 196     hb_avcodec_open( pv->context, codec );
 197
 198     return 0;
 199 }
 200
 201 /***********************************************************************
 202  * Close
 203  ***********************************************************************
 204  *
 205  **********************************************************************/
 206 static void decavcodecClose( hb_work_object_t * w )
 207 {
 208     hb_work_private_t * pv = w->private_data;
 209
 210     if ( pv )
 211     {
 212         if ( pv->job && pv->context && pv->context->codec )
 213         {
 214             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 215                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 216                     pv->ndrops );
 217         }
 218         if ( pv->sws_context )
 219         {
 220             sws_freeContext( pv->sws_context );
 221         }
 222         if ( pv->parser )
 223         {
 224             av_parser_close(pv->parser);
 225         }
 226         if ( pv->context && pv->context->codec )
 227         {
 228             hb_avcodec_close( pv->context );
 229         }
 230         if ( pv->list )
 231         {
 232             hb_list_close( &pv->list );
 233         }
 234         if ( pv->buffer )
 235         {
 236             free( pv->buffer );
 237             pv->buffer = NULL;
 238         }
 239         free( pv );
 240         w->private_data = NULL;
 241     }
 242 }
 243
 244 /***********************************************************************
 245  * Work
 246  ***********************************************************************
 247  *
 248  **********************************************************************/
 249 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 250                     hb_buffer_t ** buf_out )
 251 {
 252     hb_work_private_t * pv = w->private_data;
 253     hb_buffer_t * in = *buf_in, * buf, * last = NULL;
 254     int   pos, len, out_size, i, uncompressed_len;
 255     short buffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
 256     uint64_t cur;
 257     unsigned char *parser_output_buffer;
 258     int parser_output_buffer_len;
 259
 260     if ( (*buf_in)->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = *buf_in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     cur = ( in->start < 0 )? pv->pts_next : in->start;
 277
 278     pos = 0;
 279     while( pos < in->size )
 280     {
 281         len = av_parser_parse( pv->parser, pv->context,
 282                                &parser_output_buffer, &parser_output_buffer_len,
 283                                in->data + pos, in->size - pos, cur, cur );
 284         out_size = 0;
 285         uncompressed_len = 0;
 286         if (parser_output_buffer_len)
 287         {
 288             out_size = sizeof(buffer);
 289             uncompressed_len = avcodec_decode_audio2( pv->context, buffer,
 290                                                       &out_size,
 291                                                       parser_output_buffer,
 292                                                       parser_output_buffer_len );
 293         }
 294         if( out_size )
 295         {
 296             short * s16;
 297             float * fl32;
 298
 299             buf = hb_buffer_init( 2 * out_size );
 300
 301             int sample_size_in_bytes = 2;   // Default to 2 bytes
 302             switch (pv->context->sample_fmt)
 303             {
 304               case SAMPLE_FMT_S16:
 305                 sample_size_in_bytes = 2;
 306                 break;
 307               /* We should handle other formats here - but that needs additional format conversion work below */
 308               /* For now we'll just report the error and try to carry on */
 309               default:
 310                 hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
 311                 break;
 312             }
 313
 314             buf->start = cur;
 315             buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
 316                          pv->context->sample_rate;
 317             cur = buf->stop;
 318
 319             s16  = buffer;
 320             fl32 = (float *) buf->data;
 321             for( i = 0; i < out_size / 2; i++ )
 322             {
 323                 fl32[i] = s16[i];
 324             }
 325
 326             if( last )
 327             {
 328                 last = last->next = buf;
 329             }
 330             else
 331             {
 332                 *buf_out = last = buf;
 333             }
 334         }
 335
 336         pos += len;
 337     }
 338
 339     pv->pts_next = cur;
 340
 341     return HB_WORK_OK;
 342 }
 343
 344 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 345 {
 346     hb_work_private_t *pv = w->private_data;
 347
 348     memset( info, 0, sizeof(*info) );
 349
 350     if ( pv && pv->context )
 351     {
 352         AVCodecContext *context = pv->context;
 353         info->bitrate = context->bit_rate;
 354         info->rate = context->time_base.num;
 355         info->rate_base = context->time_base.den;
 356         info->profile = context->profile;
 357         info->level = context->level;
 358         return 1;
 359     }
 360     return 0;
 361 }
 362
 363 static const int chan2layout[] = {
 364     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 365     HB_INPUT_CH_LAYOUT_MONO,
 366     HB_INPUT_CH_LAYOUT_STEREO,
 367     HB_INPUT_CH_LAYOUT_2F1R,
 368     HB_INPUT_CH_LAYOUT_2F2R,
 369     HB_INPUT_CH_LAYOUT_3F2R,
 370     HB_INPUT_CH_LAYOUT_4F2R,
 371     HB_INPUT_CH_LAYOUT_STEREO,
 372     HB_INPUT_CH_LAYOUT_STEREO,
 373 };
 374
 375 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 376                              hb_work_info_t *info )
 377 {
 378     hb_work_private_t *pv = w->private_data;
 379     int ret = 0;
 380
 381     memset( info, 0, sizeof(*info) );
 382
 383     if ( pv && pv->context )
 384     {
 385         return decavcodecInfo( w, info );
 386     }
 387     // XXX
 388     // We should parse the bitstream to find its parameters but for right
 389     // now we just return dummy values if there's a codec that will handle it.
 390     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 391                                                            CODEC_ID_MP2 );
 392     if ( ! codec )
 393     {
 394         // there's no ffmpeg codec for this audio type - give up
 395         return -1;
 396     }
 397
 398     static char codec_name[64];
 399     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 400
 401     AVCodecParserContext *parser = av_parser_init( codec->id );
 402     AVCodecContext *context = avcodec_alloc_context();
 403     hb_avcodec_open( context, codec );
 404 #if defined( SYS_CYGWIN )
 405     uint8_t *buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
 406 #else
 407     uint8_t *buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 408 #endif
 409     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 410     unsigned char *pbuffer;
 411     int pos = 0, pbuffer_size;
 412
 413     while ( pos < buf->size )
 414     {
 415         int len = av_parser_parse( parser, context, &pbuffer, &pbuffer_size,
 416                                    buf->data + pos, buf->size - pos,
 417                                    buf->start, buf->start );
 418         pos += len;
 419         if ( pbuffer_size > 0 )
 420         {
 421             len = avcodec_decode_audio2( context, (int16_t*)buffer, &out_size,
 422                                          pbuffer, pbuffer_size );
 423             if ( len > 0 && context->sample_rate > 0 )
 424             {
 425                 info->bitrate = context->bit_rate;
 426                 info->rate = context->sample_rate;
 427                 info->rate_base = 1;
 428                 info->channel_layout = chan2layout[context->channels & 7];
 429                 ret = 1;
 430                 break;
 431             }
 432         }
 433     }
 434     free( buffer );
 435     av_parser_close( parser );
 436     hb_avcodec_close( context );
 437     return ret;
 438 }
 439
 440 /* -------------------------------------------------------------
 441  * General purpose video decoder using libavcodec
 442  */
 443
 444 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 445                             int h )
 446 {
 447     if ( dstride == sstride )
 448     {
 449         memcpy( dst, src, dstride * h );
 450         return dst + dstride * h;
 451     }
 452     int lbytes = dstride <= sstride? dstride : sstride;
 453     while ( --h >= 0 )
 454     {
 455         memcpy( dst, src, lbytes );
 456         src += sstride;
 457         dst += dstride;
 458     }
 459     return dst;
 460 }
 461
 462 // copy one video frame into an HB buf. If the frame isn't in our color space
 463 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 464 // Otherwise just copy the bits.
 465 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 466 {
 467     AVCodecContext *context = pv->context;
 468     int w, h;
 469     if ( ! pv->job )
 470     {
 471         // if the dimensions are odd, drop the lsb since h264 requires that
 472         // both width and height be even.
 473         w = ( context->width >> 1 ) << 1;
 474         h = ( context->height >> 1 ) << 1;
 475     }
 476     else
 477     {
 478         w =  pv->job->title->width;
 479         h =  pv->job->title->height;
 480     }
 481     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 482     uint8_t *dst = buf->data;
 483
 484     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 485          h != context->height )
 486     {
 487         // have to convert to our internal color space and/or rescale
 488         AVPicture dstpic;
 489         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 490
 491         if ( ! pv->sws_context )
 492         {
 493             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 494                                               w, h, PIX_FMT_YUV420P,
 495                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 496                                               NULL, NULL, NULL );
 497         }
 498         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 499                    dstpic.data, dstpic.linesize );
 500     }
 501     else
 502     {
 503         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 504         w = (w + 1) >> 1; h = (h + 1) >> 1;
 505         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 506         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 507     }
 508     return buf;
 509 }
 510
 511 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 512 {
 513     hb_work_private_t *pv = context->opaque;
 514     frame->pts = pv->pts;
 515     pv->pts = -1;
 516     return avcodec_default_get_buffer( context, frame );
 517 }
 518
 519 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 520 {
 521     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 522     if ( c && c->title )
 523     {
 524         hb_log( "%s: \"%s\" (%d) at frame %u time %lld",
 525                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 526     }
 527     else
 528     {
 529         hb_log( "%s: Chapter %d at frame %u time %lld",
 530                 pv->context->codec->name, chap_num, pv->nframes, pts );
 531     }
 532 }
 533
 534 static void flushDelayQueue( hb_work_private_t *pv )
 535 {
 536     hb_buffer_t *buf;
 537     int slot = pv->nframes & (HEAP_SIZE-1);
 538
 539     // flush all the video packets left on our timestamp-reordering delay q
 540     while ( ( buf = pv->delayq[slot] ) != NULL )
 541     {
 542         buf->start = heap_pop( &pv->pts_heap );
 543         hb_list_add( pv->list, buf );
 544         pv->delayq[slot] = NULL;
 545         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 546     }
 547 }
 548
 549 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 550 {
 551     int got_picture, oldlevel = 0;
 552     AVFrame frame;
 553
 554     if ( global_verbosity_level <= 1 )
 555     {
 556         oldlevel = av_log_get_level();
 557         av_log_set_level( AV_LOG_QUIET );
 558     }
 559     if ( avcodec_decode_video( pv->context, &frame, &got_picture, data, size ) < 0 )
 560     {
 561         ++pv->decode_errors;
 562     }
 563     if ( global_verbosity_level <= 1 )
 564     {
 565         av_log_set_level( oldlevel );
 566     }
 567     if( got_picture )
 568     {
 569         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 570         // packet had a pts we handed it to av_parser_parse (if the packet had
 571         // no pts we set it to -1 but before the parse we can't distinguish between
 572         // the start of a video frame with no pts & an intermediate packet of
 573         // some frame which never has a pts). we hope that when parse returns
 574         // the frame to us the pts we originally handed it will be in parser->pts.
 575         // we put this pts into pv->pts so that when a avcodec_decode_video
 576         // finally gets around to allocating an AVFrame to hold the decoded
 577         // frame we can stuff that pts into the frame. if all of these relays
 578         // worked at this point frame.pts should hold the frame's pts from the
 579         // original data stream or -1 if it didn't have one. in the latter case
 580         // we generate the next pts in sequence for it.
 581         double frame_dur = pv->duration;
 582         if ( frame_dur <= 0 )
 583         {
 584             frame_dur = 90000. * (double)pv->context->time_base.num /
 585                         (double)pv->context->time_base.den;
 586             pv->duration = frame_dur;
 587         }
 588         if ( frame.repeat_pict )
 589         {
 590             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 591         }
 592         // If there was no pts for this frame, assume constant frame rate
 593         // video & estimate the next frame time from the last & duration.
 594         double pts = frame.pts;
 595         if ( pts < 0 )
 596         {
 597             pts = pv->pts_next;
 598         }
 599         pv->pts_next = pts + frame_dur;
 600
 601         hb_buffer_t *buf;
 602
 603         // if we're doing a scan or this content couldn't have been broken
 604         // by Microsoft we don't worry about timestamp reordering
 605         if ( ! pv->job || ! pv->brokenByMicrosoft )
 606         {
 607             buf = copy_frame( pv, &frame );
 608             buf->start = pts;
 609             hb_list_add( pv->list, buf );
 610             ++pv->nframes;
 611             return got_picture;
 612         }
 613
 614         // XXX This following probably addresses a libavcodec bug but I don't
 615         //     see an easy fix so we workaround it here.
 616         //
 617         // The M$ 'packed B-frames' atrocity results in decoded frames with
 618         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 619         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 620         // The frames are actually delivered in the right order but with
 621         // the wrong timestamp. To get the correct timestamp attached to
 622         // each frame we have a delay queue (longer than the max number of
 623         // b-frames) & a sorting heap for the timestamps. As each frame
 624         // comes out of the decoder the oldest frame in the queue is removed
 625         // and associated with the smallest timestamp. Then the new frame is
 626         // added to the queue & its timestamp is pushed on the heap.
 627         // This does nothing if the timestamps are correct (i.e., the video
 628         // uses a codec that Micro$oft hasn't broken yet) but the frames
 629         // get timestamped correctly even when M$ has munged them.
 630
 631         // remove the oldest picture from the frame queue (if any) &
 632         // give it the smallest timestamp from our heap. The queue size
 633         // is a power of two so we get the slot of the oldest by masking
 634         // the frame count & this will become the slot of the newest
 635         // once we've removed & processed the oldest.
 636         int slot = pv->nframes & (HEAP_SIZE-1);
 637         if ( ( buf = pv->delayq[slot] ) != NULL )
 638         {
 639             buf->start = heap_pop( &pv->pts_heap );
 640
 641             if ( pv->new_chap && buf->start >= pv->chap_time )
 642             {
 643                 buf->new_chap = pv->new_chap;
 644                 pv->new_chap = 0;
 645                 pv->chap_time = 0;
 646                 log_chapter( pv, buf->new_chap, buf->start );
 647             }
 648             else if ( pv->nframes == 0 )
 649             {
 650                 log_chapter( pv, pv->job->chapter_start, buf->start );
 651             }
 652             hb_list_add( pv->list, buf );
 653         }
 654
 655         // add the new frame to the delayq & push its timestamp on the heap
 656         pv->delayq[slot] = copy_frame( pv, &frame );
 657         heap_push( &pv->pts_heap, pts );
 658
 659         ++pv->nframes;
 660     }
 661
 662     return got_picture;
 663 }
 664
 665 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 666                          int64_t pts, int64_t dts )
 667 {
 668     /*
 669      * The following loop is a do..while because we need to handle both
 670      * data & the flush at the end (signaled by size=0). At the end there's
 671      * generally a frame in the parser & one or more frames in the decoder
 672      * (depending on the bframes setting).
 673      */
 674     int pos = 0;
 675     do {
 676         uint8_t *pout;
 677         int pout_len;
 678         int len = av_parser_parse( pv->parser, pv->context, &pout, &pout_len,
 679                                    data + pos, size - pos, pts, dts );
 680         pos += len;
 681
 682         if ( pout_len > 0 )
 683         {
 684             pv->pts = pv->parser->pts;
 685             decodeFrame( pv, pout, pout_len );
 686         }
 687     } while ( pos < size );
 688
 689     /* the stuff above flushed the parser, now flush the decoder */
 690     if ( size <= 0 )
 691     {
 692         while ( decodeFrame( pv, NULL, 0 ) )
 693         {
 694         }
 695         flushDelayQueue( pv );
 696     }
 697 }
 698
 699 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 700 {
 701     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 702
 703     if ( head )
 704     {
 705         hb_list_rem( pv->list, head );
 706
 707         hb_buffer_t *last = head, *buf;
 708
 709         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 710         {
 711             hb_list_rem( pv->list, buf );
 712             last->next = buf;
 713             last = buf;
 714         }
 715     }
 716     return head;
 717 }
 718
 719
 720 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 721 {
 722
 723     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 724     w->private_data = pv;
 725     pv->job   = job;
 726     pv->list = hb_list_init();
 727
 728     int codec_id = w->codec_param;
 729     pv->parser = av_parser_init( codec_id );
 730     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 731
 732     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 733     pv->context->opaque = pv;
 734     pv->context->get_buffer = get_frame_buf;
 735
 736     return 0;
 737 }
 738
 739 static int next_hdr( hb_buffer_t *in, int offset )
 740 {
 741     uint8_t *dat = in->data;
 742     uint16_t last2 = 0xffff;
 743     for ( ; in->size - offset > 1; ++offset )
 744     {
 745         if ( last2 == 0 && dat[offset] == 0x01 )
 746             // found an mpeg start code
 747             return offset - 2;
 748
 749         last2 = ( last2 << 8 ) | dat[offset];
 750     }
 751
 752     return -1;
 753 }
 754
 755 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 756 {
 757     if ( in->size - offset < 4 )
 758         // not enough room for an mpeg start code
 759         return -1;
 760
 761     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 762     {
 763         if ( in->data[offset+3] == hdr_type )
 764             // found it
 765             break;
 766     }
 767     return offset;
 768 }
 769
 770 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 771 {
 772     hb_work_private_t *pv = w->private_data;
 773
 774     // we can't call the avstream funcs but the read_header func in the
 775     // AVInputFormat may set up some state in the AVContext. In particular
 776     // vc1t_read_header allocates 'extradata' to deal with header issues
 777     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 778     // of space to make vc1 work then associate the codec with the context.
 779     if ( w->codec_param != CODEC_ID_VC1 )
 780     {
 781         // we haven't been inflicted with M$ - allocate a little space as
 782         // a marker and return success.
 783         pv->context->extradata_size = 16;
 784         pv->context->extradata = av_malloc(pv->context->extradata_size);
 785         return 0;
 786     }
 787
 788     // find the start and and of the sequence header
 789     int shdr, shdr_end;
 790     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 791     {
 792         // didn't find start of seq hdr
 793         return 1;
 794     }
 795     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 796     {
 797         shdr_end = in->size;
 798     }
 799     shdr_end -= shdr;
 800
 801     // find the start and and of the entry point header
 802     int ehdr, ehdr_end;
 803     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 804     {
 805         // didn't find start of entry point hdr
 806         return 1;
 807     }
 808     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 809     {
 810         ehdr_end = in->size;
 811     }
 812     ehdr_end -= ehdr;
 813
 814     // found both headers - allocate an extradata big enough to hold both
 815     // then copy them into it.
 816     pv->context->extradata_size = shdr_end + ehdr_end;
 817     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 818     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 819     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 820     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 821     return 0;
 822 }
 823
 824 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 825                             hb_buffer_t ** buf_out )
 826 {
 827     hb_work_private_t *pv = w->private_data;
 828     hb_buffer_t *in = *buf_in;
 829     int64_t pts = AV_NOPTS_VALUE;
 830     int64_t dts = pts;
 831
 832     *buf_in = NULL;
 833
 834     /* if we got an empty buffer signaling end-of-stream send it downstream */
 835     if ( in->size == 0 )
 836     {
 837         decodeVideo( pv, in->data, in->size, pts, dts );
 838         hb_list_add( pv->list, in );
 839         *buf_out = link_buf_list( pv );
 840         return HB_WORK_DONE;
 841     }
 842
 843     // if this is the first frame open the codec (we have to wait for the
 844     // first frame because of M$ VC1 braindamage).
 845     if ( pv->context->extradata_size == 0 )
 846     {
 847         if ( setup_extradata( w, in ) )
 848         {
 849             // we didn't find the headers needed to set up extradata.
 850             // the codec will abort if we open it so just free the buf
 851             // and hope we eventually get the info we need.
 852             hb_buffer_close( &in );
 853             return HB_WORK_OK;
 854         }
 855         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 856         // There's a mis-feature in ffmpeg that causes the context to be
 857         // incorrectly initialized the 1st time avcodec_open is called.
 858         // If you close it and open a 2nd time, it finishes the job.
 859         hb_avcodec_open( pv->context, codec );
 860         hb_avcodec_close( pv->context );
 861         hb_avcodec_open( pv->context, codec );
 862     }
 863
 864     if( in->start >= 0 )
 865     {
 866         pts = in->start;
 867         dts = in->renderOffset;
 868     }
 869     if ( in->new_chap )
 870     {
 871         pv->new_chap = in->new_chap;
 872         pv->chap_time = pts >= 0? pts : pv->pts_next;
 873     }
 874     decodeVideo( pv, in->data, in->size, pts, dts );
 875     hb_buffer_close( &in );
 876     *buf_out = link_buf_list( pv );
 877     return HB_WORK_OK;
 878 }
 879
 880 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 881 {
 882     hb_work_private_t *pv = w->private_data;
 883
 884     memset( info, 0, sizeof(*info) );
 885
 886     if ( pv && pv->context )
 887     {
 888         AVCodecContext *context = pv->context;
 889         info->bitrate = context->bit_rate;
 890         info->width = context->width;
 891         info->height = context->height;
 892
 893         /* ffmpeg gives the frame rate in frames per second while HB wants
 894          * it in units of the 27MHz MPEG clock. */
 895         info->rate = 27000000;
 896         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 897                           context->time_base.den;
 898         if ( context->ticks_per_frame > 1 )
 899         {
 900             // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
 901             // field rate rather than frame rate so convert back to frames.
 902             info->rate_base *= context->ticks_per_frame;
 903         }
 904
 905         /* Sometimes there's no pixel aspect set in the source. In that case,
 906            assume a 1:1 PAR. Otherwise, preserve the source PAR.             */
 907         info->pixel_aspect_width = context->sample_aspect_ratio.num ?
 908                                         context->sample_aspect_ratio.num : 1;
 909         info->pixel_aspect_height = context->sample_aspect_ratio.den ?
 910                                         context->sample_aspect_ratio.den : 1;
 911
 912         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 913          * Display Aspect Ratio so we convert by scaling by the Storage
 914          * Aspect Ratio (w/h). We do the calc in floating point to get the
 915          * rounding right. */
 916         info->aspect = (double)info->pixel_aspect_width *
 917                        (double)context->width /
 918                        (double)info->pixel_aspect_height /
 919                        (double)context->height;
 920
 921         info->profile = context->profile;
 922         info->level = context->level;
 923         info->name = context->codec->name;
 924         return 1;
 925     }
 926     return 0;
 927 }
 928
 929 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 930                              hb_work_info_t *info )
 931 {
 932     return 0;
 933 }
 934
 935 hb_work_object_t hb_decavcodecv =
 936 {
 937     WORK_DECAVCODECV,
 938     "Video decoder (libavcodec)",
 939     decavcodecvInit,
 940     decavcodecvWork,
 941     decavcodecClose,
 942     decavcodecvInfo,
 943     decavcodecvBSInfo
 944 };
 945
 946
 947 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 948 // includes a parser and passes information from the parser to the decoder
 949 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 950 // We *have* to use that codec context to decode the stream or we'll get
 951 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 952 // codec context in our codec_param.
 953
 954 // this routine gets the appropriate context pointer from the ffmpeg
 955 // stream reader. it can't be called until we get the first buffer because
 956 // we can't guarantee that reader will be called before the our init
 957 // routine and if our init is called first we'll get a pointer to the
 958 // old scan stream (which has already been closed).
 959 static void init_ffmpeg_context( hb_work_object_t *w )
 960 {
 961     hb_work_private_t *pv = w->private_data;
 962     pv->context = hb_ffmpeg_context( w->codec_param );
 963
 964     // during scan the decoder gets closed & reopened which will
 965     // close the codec so reopen it if it's not there
 966     if ( ! pv->context->codec )
 967     {
 968         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 969         hb_avcodec_open( pv->context, codec );
 970     }
 971     // set up our best guess at the frame duration.
 972     // the frame rate in the codec is usually bogus but it's sometimes
 973     // ok in the stream.
 974     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 975
 976     if ( st->nb_frames && st->duration )
 977     {
 978         // compute the average frame duration from the total number
 979         // of frames & the total duration.
 980         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 981                        ( (double)st->nb_frames * (double)st->time_base.den );
 982     }
 983     else
 984     {
 985         // XXX We don't have a frame count or duration so try to use the
 986         // far less reliable time base info in the stream.
 987         // Because the time bases are so screwed up, we only take values
 988         // in the range 8fps - 64fps.
 989         AVRational tb;
 990         if ( st->time_base.num * 64 > st->time_base.den &&
 991              st->time_base.den > st->time_base.num * 8 )
 992         {
 993             tb = st->time_base;
 994         }
 995         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
 996                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
 997         {
 998             tb.num = st->r_frame_rate.den;
 999             tb.den = st->r_frame_rate.num;
1000         }
1001         else
1002         {
1003             tb.num = 1001;  /*XXX*/
1004             tb.den = 24000; /*XXX*/
1005         }
1006         pv->duration =  (double)tb.num / (double)tb.den;
1007     }
1008     pv->duration *= 90000.;
1009
1010     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1011     pv->context->opaque = pv;
1012     pv->context->get_buffer = get_frame_buf;
1013
1014     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1015     // b-frames abortion that messes up frame ordering and timestamps.
1016     // XXX ffmpeg knows which streams are broken but doesn't expose the
1017     //     info externally. We should patch ffmpeg to add a flag to the
1018     //     codec context for this but until then we mark all ffmpeg streams
1019     //     as suspicious.
1020     pv->brokenByMicrosoft = 1;
1021 }
1022
1023 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1024 {
1025     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1026     // will seg fault in odd, data dependent ways if it's not there. (my guess
1027     // is this is a case of a local performance optimization creating a global
1028     // performance degradation since all the time wasted by extraneous data
1029     // copies & memory zeroing has to be huge compared to the minor reduction
1030     // in inner-loop instructions this affords - modern cpus bottleneck on
1031     // memory bandwidth not instruction bandwidth).
1032     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1033     {
1034         // have to realloc to add the padding
1035         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1036     }
1037     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1038 }
1039
1040 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1041 {
1042
1043     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1044     w->private_data = pv;
1045     pv->job   = job;
1046     pv->list = hb_list_init();
1047     pv->pts_next = -1;
1048     pv->pts = -1;
1049     return 0;
1050 }
1051
1052 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1053                              hb_buffer_t ** buf_out )
1054 {
1055     hb_work_private_t *pv = w->private_data;
1056     if ( ! pv->context )
1057     {
1058         init_ffmpeg_context( w );
1059     }
1060     hb_buffer_t *in = *buf_in;
1061     *buf_in = NULL;
1062
1063     /* if we got an empty buffer signaling end-of-stream send it downstream */
1064     if ( in->size == 0 )
1065     {
1066         /* flush any frames left in the decoder */
1067         while ( decodeFrame( pv, NULL, 0 ) )
1068         {
1069         }
1070         flushDelayQueue( pv );
1071         hb_list_add( pv->list, in );
1072         *buf_out = link_buf_list( pv );
1073         return HB_WORK_DONE;
1074     }
1075
1076     int64_t pts = in->start;
1077     if( pts >= 0 )
1078     {
1079         // use the first timestamp as our 'next expected' pts
1080         if ( pv->pts_next < 0 )
1081         {
1082             pv->pts_next = pts;
1083         }
1084         pv->pts = pts;
1085     }
1086
1087     if ( in->new_chap )
1088     {
1089         pv->new_chap = in->new_chap;
1090         pv->chap_time = pts >= 0? pts : pv->pts_next;
1091     }
1092     prepare_ffmpeg_buffer( in );
1093     decodeFrame( pv, in->data, in->size );
1094     hb_buffer_close( &in );
1095     *buf_out = link_buf_list( pv );
1096     return HB_WORK_OK;
1097 }
1098
1099 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1100 {
1101     if ( decavcodecvInfo( w, info ) )
1102     {
1103         hb_work_private_t *pv = w->private_data;
1104         if ( ! pv->context )
1105         {
1106             init_ffmpeg_context( w );
1107         }
1108         // we have the frame duration in units of the 90KHz pts clock but
1109         // need it in units of the 27MHz MPEG clock. */
1110         info->rate = 27000000;
1111         info->rate_base = pv->duration * 300.;
1112         return 1;
1113     }
1114     return 0;
1115 }
1116
1117 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1118 {
1119     AVCodecContext *context = pv->context;
1120     int pos = 0;
1121
1122     while ( pos < size )
1123     {
1124         int16_t *buffer = pv->buffer;
1125         if ( buffer == NULL )
1126         {
1127             // XXX ffmpeg bug workaround
1128             // malloc a buffer for the audio decode. On an x86, ffmpeg
1129             // uses mmx/sse instructions on this buffer without checking
1130             // that it's 16 byte aligned and this will cause an abort if
1131             // the buffer is allocated on our stack. Rather than doing
1132             // complicated, machine dependent alignment here we use the
1133             // fact that malloc returns an aligned pointer on most architectures.
1134
1135             #if defined( SYS_CYGWIN )
1136                 // Cygwin's malloc doesn't appear to return 16-byte aligned memory so use memalign instead.
1137                pv->buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
1138             #else
1139                 pv->buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1140             #endif
1141
1142             buffer = pv->buffer;
1143         }
1144         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1145         int len = avcodec_decode_audio2( context, buffer, &out_size,
1146                                          data + pos, size - pos );
1147         if ( len <= 0 )
1148         {
1149             return;
1150         }
1151         pos += len;
1152         if( out_size > 0 )
1153         {
1154             // We require signed 16-bit ints for the output format. If
1155             // we got something different convert it.
1156             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1157             {
1158                 // Note: av_audio_convert seems to be a work-in-progress but
1159                 //       looks like it will eventually handle general audio
1160                 //       mixdowns which would allow us much more flexibility
1161                 //       in handling multichannel audio in HB. If we were doing
1162                 //       anything more complicated than a one-for-one format
1163                 //       conversion we'd probably want to cache the converter
1164                 //       context in the pv.
1165                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1166                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1167                                                               context->sample_fmt, 1,
1168                                                               NULL, 0 );
1169                 // get output buffer size (in 2-byte samples) then malloc a buffer
1170                 out_size = ( out_size * 2 ) / isamp;
1171                 buffer = malloc( out_size );
1172
1173                 // we're doing straight sample format conversion which behaves as if
1174                 // there were only one channel.
1175                 const void * const ibuf[6] = { pv->buffer };
1176                 void * const obuf[6] = { buffer };
1177                 const int istride[6] = { isamp };
1178                 const int ostride[6] = { 2 };
1179
1180                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1181                 av_audio_convert_free( ctx );
1182             }
1183             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1184
1185             // convert from bytes to total samples
1186             out_size >>= 1;
1187
1188             double pts = pv->pts_next;
1189             buf->start = pts;
1190             pts += out_size * pv->duration;
1191             buf->stop  = pts;
1192             pv->pts_next = pts;
1193
1194             float *fl32 = (float *)buf->data;
1195             int i;
1196             for( i = 0; i < out_size; ++i )
1197             {
1198                 fl32[i] = buffer[i];
1199             }
1200             hb_list_add( pv->list, buf );
1201
1202             // if we allocated a buffer for sample format conversion, free it
1203             if ( buffer != pv->buffer )
1204             {
1205                 free( buffer );
1206             }
1207         }
1208     }
1209 }
1210
1211 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1212                     hb_buffer_t **buf_out )
1213 {
1214     if ( (*buf_in)->size <= 0 )
1215     {
1216         /* EOF on input stream - send it downstream & say that we're done */
1217         *buf_out = *buf_in;
1218         *buf_in = NULL;
1219         return HB_WORK_DONE;
1220     }
1221
1222     hb_work_private_t *pv = w->private_data;
1223
1224     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1225     {
1226         // discard buffers that start before video time 0
1227         *buf_out = NULL;
1228         return HB_WORK_OK;
1229     }
1230
1231     if ( ! pv->context )
1232     {
1233         init_ffmpeg_context( w );
1234         // duration is a scaling factor to go from #bytes in the decoded
1235         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1236         // total samples to per-channel samples. 'sample_rate' converts
1237         // per-channel samples to seconds per sample and the 90000
1238         // is mpeg ticks per second.
1239         pv->duration = 90000. /
1240                     (double)( pv->context->sample_rate * pv->context->channels );
1241     }
1242     hb_buffer_t *in = *buf_in;
1243
1244     // if the packet has a timestamp use it if we don't have a timestamp yet
1245     // or if there's been a timing discontinuity of more than 100ms.
1246     if ( in->start >= 0 &&
1247          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1248     {
1249         pv->pts_next = in->start;
1250     }
1251     prepare_ffmpeg_buffer( in );
1252     decodeAudio( pv, in->data, in->size );
1253     *buf_out = link_buf_list( pv );
1254
1255     return HB_WORK_OK;
1256 }
1257
1258 hb_work_object_t hb_decavcodecvi =
1259 {
1260     WORK_DECAVCODECVI,
1261     "Video decoder (ffmpeg streams)",
1262     decavcodecviInit,
1263     decavcodecviWork,
1264     decavcodecClose,
1265     decavcodecviInfo,
1266     decavcodecvBSInfo
1267 };
1268
1269 hb_work_object_t hb_decavcodecai =
1270 {
1271     WORK_DECAVCODECAI,
1272     "Audio decoder (ffmpeg streams)",
1273     decavcodecviInit,
1274     decavcodecaiWork,
1275     decavcodecClose,
1276     decavcodecInfo,
1277     decavcodecBSInfo
1278 };