libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static int64_t heap_pop( pts_heap_t *heap )
 114 {
 115     int64_t result;
 116
 117     if ( heap->nheap <= 0 )
 118     {
 119         return -1;
 120     }
 121
 122     // return the top of the heap then put the bottom element on top,
 123     // decrease the heap size by one & rebalence the heap.
 124     result = heap->h[1];
 125
 126     int64_t v = heap->h[heap->nheap--];
 127     int parent = 1;
 128     int child = parent << 1;
 129     while ( child <= heap->nheap )
 130     {
 131         // find the smallest of the two children of parent
 132         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 133             ++child;
 134
 135         if (v <= heap->h[child])
 136             // new item is smaller than either child so it's the new parent.
 137             break;
 138
 139         // smallest child is smaller than new item so move it up then
 140         // check its children.
 141         int64_t hp = heap->h[child];
 142         heap->h[parent] = hp;
 143         parent = child;
 144         child = parent << 1;
 145     }
 146     heap->h[parent] = v;
 147     return result;
 148 }
 149
 150 static void heap_push( pts_heap_t *heap, int64_t v )
 151 {
 152     if ( heap->nheap < HEAP_SIZE )
 153     {
 154         ++heap->nheap;
 155     }
 156
 157     // stick the new value on the bottom of the heap then bubble it
 158     // up to its correct spot.
 159         int child = heap->nheap;
 160         while (child > 1) {
 161                 int parent = child >> 1;
 162                 if (heap->h[parent] <= v)
 163                         break;
 164                 // move parent down
 165                 int64_t hp = heap->h[parent];
 166                 heap->h[child] = hp;
 167                 child = parent;
 168         }
 169         heap->h[child] = v;
 170 }
 171
 172
 173 /***********************************************************************
 174  * hb_work_decavcodec_init
 175  ***********************************************************************
 176  *
 177  **********************************************************************/
 178 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 179 {
 180     AVCodec * codec;
 181
 182     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 183     w->private_data = pv;
 184
 185     pv->job   = job;
 186
 187     int codec_id = w->codec_param;
 188     /*XXX*/
 189     if ( codec_id == 0 )
 190         codec_id = CODEC_ID_MP2;
 191
 192     codec = avcodec_find_decoder( codec_id );
 193     pv->parser = av_parser_init( codec_id );
 194
 195     pv->context = avcodec_alloc_context();
 196     hb_avcodec_open( pv->context, codec );
 197
 198     return 0;
 199 }
 200
 201 /***********************************************************************
 202  * Close
 203  ***********************************************************************
 204  *
 205  **********************************************************************/
 206 static void decavcodecClose( hb_work_object_t * w )
 207 {
 208     hb_work_private_t * pv = w->private_data;
 209
 210     if ( pv )
 211     {
 212         if ( pv->job && pv->context && pv->context->codec )
 213         {
 214             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 215                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 216                     pv->ndrops );
 217         }
 218         if ( pv->sws_context )
 219         {
 220             sws_freeContext( pv->sws_context );
 221         }
 222         if ( pv->parser )
 223         {
 224             av_parser_close(pv->parser);
 225         }
 226         if ( pv->context && pv->context->codec )
 227         {
 228             hb_avcodec_close( pv->context );
 229         }
 230         if ( pv->list )
 231         {
 232             hb_list_close( &pv->list );
 233         }
 234         if ( pv->buffer )
 235         {
 236             free( pv->buffer );
 237             pv->buffer = NULL;
 238         }
 239         free( pv );
 240         w->private_data = NULL;
 241     }
 242 }
 243
 244 /***********************************************************************
 245  * Work
 246  ***********************************************************************
 247  *
 248  **********************************************************************/
 249 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 250                     hb_buffer_t ** buf_out )
 251 {
 252     hb_work_private_t * pv = w->private_data;
 253     hb_buffer_t * in = *buf_in, * buf, * last = NULL;
 254     int   pos, len, out_size, i, uncompressed_len;
 255     short buffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
 256     uint64_t cur;
 257     unsigned char *parser_output_buffer;
 258     int parser_output_buffer_len;
 259
 260     if ( (*buf_in)->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = *buf_in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     cur = ( in->start < 0 )? pv->pts_next : in->start;
 277
 278     pos = 0;
 279     while( pos < in->size )
 280     {
 281         len = av_parser_parse( pv->parser, pv->context,
 282                                &parser_output_buffer, &parser_output_buffer_len,
 283                                in->data + pos, in->size - pos, cur, cur );
 284         out_size = 0;
 285         uncompressed_len = 0;
 286         if (parser_output_buffer_len)
 287         {
 288             out_size = sizeof(buffer);
 289             uncompressed_len = avcodec_decode_audio2( pv->context, buffer,
 290                                                       &out_size,
 291                                                       parser_output_buffer,
 292                                                       parser_output_buffer_len );
 293         }
 294         if( out_size )
 295         {
 296             short * s16;
 297             float * fl32;
 298
 299             buf = hb_buffer_init( 2 * out_size );
 300
 301             int sample_size_in_bytes = 2;   // Default to 2 bytes
 302             switch (pv->context->sample_fmt)
 303             {
 304               case SAMPLE_FMT_S16:
 305                 sample_size_in_bytes = 2;
 306                 break;
 307               /* We should handle other formats here - but that needs additional format conversion work below */
 308               /* For now we'll just report the error and try to carry on */
 309               default:
 310                 hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
 311                 break;
 312             }
 313
 314             buf->start = cur;
 315             buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
 316                          pv->context->sample_rate;
 317             cur = buf->stop;
 318
 319             s16  = buffer;
 320             fl32 = (float *) buf->data;
 321             for( i = 0; i < out_size / 2; i++ )
 322             {
 323                 fl32[i] = s16[i];
 324             }
 325
 326             if( last )
 327             {
 328                 last = last->next = buf;
 329             }
 330             else
 331             {
 332                 *buf_out = last = buf;
 333             }
 334         }
 335
 336         pos += len;
 337     }
 338
 339     pv->pts_next = cur;
 340
 341     return HB_WORK_OK;
 342 }
 343
 344 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 345 {
 346     hb_work_private_t *pv = w->private_data;
 347
 348     memset( info, 0, sizeof(*info) );
 349
 350     if ( pv && pv->context )
 351     {
 352         AVCodecContext *context = pv->context;
 353         info->bitrate = context->bit_rate;
 354         info->rate = context->time_base.num;
 355         info->rate_base = context->time_base.den;
 356         info->profile = context->profile;
 357         info->level = context->level;
 358         return 1;
 359     }
 360     return 0;
 361 }
 362
 363 static const int chan2layout[] = {
 364     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 365     HB_INPUT_CH_LAYOUT_MONO,
 366     HB_INPUT_CH_LAYOUT_STEREO,
 367     HB_INPUT_CH_LAYOUT_2F1R,
 368     HB_INPUT_CH_LAYOUT_2F2R,
 369     HB_INPUT_CH_LAYOUT_3F2R,
 370     HB_INPUT_CH_LAYOUT_4F2R,
 371     HB_INPUT_CH_LAYOUT_STEREO,
 372     HB_INPUT_CH_LAYOUT_STEREO,
 373 };
 374
 375 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 376                              hb_work_info_t *info )
 377 {
 378     hb_work_private_t *pv = w->private_data;
 379     int ret = 0;
 380
 381     memset( info, 0, sizeof(*info) );
 382
 383     if ( pv && pv->context )
 384     {
 385         return decavcodecInfo( w, info );
 386     }
 387     // XXX
 388     // We should parse the bitstream to find its parameters but for right
 389     // now we just return dummy values if there's a codec that will handle it.
 390     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 391                                                            CODEC_ID_MP2 );
 392     if ( ! codec )
 393     {
 394         // there's no ffmpeg codec for this audio type - give up
 395         return -1;
 396     }
 397
 398     static char codec_name[64];
 399     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 400
 401     AVCodecParserContext *parser = av_parser_init( codec->id );
 402     AVCodecContext *context = avcodec_alloc_context();
 403     hb_avcodec_open( context, codec );
 404 #ifdef SYS_CYGWIN
 405     uint8_t *buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
 406 #else
 407     uint8_t *buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 408 #endif
 409     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 410     unsigned char *pbuffer;
 411     int pos = 0, pbuffer_size;
 412
 413     while ( pos < buf->size )
 414     {
 415         int len = av_parser_parse( parser, context, &pbuffer, &pbuffer_size,
 416                                    buf->data + pos, buf->size - pos,
 417                                    buf->start, buf->start );
 418         pos += len;
 419         if ( pbuffer_size > 0 )
 420         {
 421             len = avcodec_decode_audio2( context, (int16_t*)buffer, &out_size,
 422                                          pbuffer, pbuffer_size );
 423             if ( len > 0 && context->sample_rate > 0 )
 424             {
 425                 info->bitrate = context->bit_rate;
 426                 info->rate = context->sample_rate;
 427                 info->rate_base = 1;
 428                 info->channel_layout = chan2layout[context->channels & 7];
 429                 ret = 1;
 430                 break;
 431             }
 432         }
 433     }
 434     free( buffer );
 435     av_parser_close( parser );
 436     hb_avcodec_close( context );
 437     return ret;
 438 }
 439
 440 /* -------------------------------------------------------------
 441  * General purpose video decoder using libavcodec
 442  */
 443
 444 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 445                             int h )
 446 {
 447     if ( dstride == sstride )
 448     {
 449         memcpy( dst, src, dstride * h );
 450         return dst + dstride * h;
 451     }
 452     int lbytes = dstride <= sstride? dstride : sstride;
 453     while ( --h >= 0 )
 454     {
 455         memcpy( dst, src, lbytes );
 456         src += sstride;
 457         dst += dstride;
 458     }
 459     return dst;
 460 }
 461
 462 // copy one video frame into an HB buf. If the frame isn't in our color space
 463 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 464 // Otherwise just copy the bits.
 465 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 466 {
 467     AVCodecContext *context = pv->context;
 468     int w, h;
 469     if ( ! pv->job )
 470     {
 471         // if the dimensions are odd, drop the lsb since h264 requires that
 472         // both width and height be even.
 473         w = ( context->width >> 1 ) << 1;
 474         h = ( context->height >> 1 ) << 1;
 475     }
 476     else
 477     {
 478         w =  pv->job->title->width;
 479         h =  pv->job->title->height;
 480     }
 481     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 482     uint8_t *dst = buf->data;
 483
 484     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 485          h != context->height )
 486     {
 487         // have to convert to our internal color space and/or rescale
 488         AVPicture dstpic;
 489         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 490
 491         if ( ! pv->sws_context )
 492         {
 493             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 494                                               w, h, PIX_FMT_YUV420P,
 495                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 496                                               NULL, NULL, NULL );
 497         }
 498         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 499                    dstpic.data, dstpic.linesize );
 500     }
 501     else
 502     {
 503         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 504         w = (w + 1) >> 1; h = (h + 1) >> 1;
 505         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 506         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 507     }
 508     return buf;
 509 }
 510
 511 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 512 {
 513     hb_work_private_t *pv = context->opaque;
 514     frame->pts = pv->pts;
 515     pv->pts = -1;
 516     return avcodec_default_get_buffer( context, frame );
 517 }
 518
 519 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 520 {
 521     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 522     if ( c && c->title )
 523     {
 524         hb_log( "%s: \"%s\" (%d) at frame %u time %lld",
 525                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 526     }
 527     else
 528     {
 529         hb_log( "%s: Chapter %d at frame %u time %lld",
 530                 pv->context->codec->name, chap_num, pv->nframes, pts );
 531     }
 532 }
 533
 534 static void flushDelayQueue( hb_work_private_t *pv )
 535 {
 536     hb_buffer_t *buf;
 537     int slot = pv->nframes & (HEAP_SIZE-1);
 538
 539     // flush all the video packets left on our timestamp-reordering delay q
 540     while ( ( buf = pv->delayq[slot] ) != NULL )
 541     {
 542         buf->start = heap_pop( &pv->pts_heap );
 543         hb_list_add( pv->list, buf );
 544         pv->delayq[slot] = NULL;
 545         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 546     }
 547 }
 548
 549 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 550 {
 551     int got_picture, oldlevel = 0;
 552     AVFrame frame;
 553
 554     if ( global_verbosity_level <= 1 )
 555     {
 556         oldlevel = av_log_get_level();
 557         av_log_set_level( AV_LOG_QUIET );
 558     }
 559     if ( avcodec_decode_video( pv->context, &frame, &got_picture, data, size ) < 0 )
 560     {
 561         ++pv->decode_errors;
 562     }
 563     if ( global_verbosity_level <= 1 )
 564     {
 565         av_log_set_level( oldlevel );
 566     }
 567     if( got_picture )
 568     {
 569         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 570         // packet had a pts we handed it to av_parser_parse (if the packet had
 571         // no pts we set it to -1 but before the parse we can't distinguish between
 572         // the start of a video frame with no pts & an intermediate packet of
 573         // some frame which never has a pts). we hope that when parse returns
 574         // the frame to us the pts we originally handed it will be in parser->pts.
 575         // we put this pts into pv->pts so that when a avcodec_decode_video
 576         // finally gets around to allocating an AVFrame to hold the decoded
 577         // frame we can stuff that pts into the frame. if all of these relays
 578         // worked at this point frame.pts should hold the frame's pts from the
 579         // original data stream or -1 if it didn't have one. in the latter case
 580         // we generate the next pts in sequence for it.
 581         double frame_dur = pv->duration;
 582         if ( frame_dur <= 0 )
 583         {
 584             frame_dur = 90000. * (double)pv->context->time_base.num /
 585                         (double)pv->context->time_base.den;
 586             pv->duration = frame_dur;
 587         }
 588         if ( frame.repeat_pict )
 589         {
 590             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 591         }
 592         // If there was no pts for this frame, assume constant frame rate
 593         // video & estimate the next frame time from the last & duration.
 594         double pts = frame.pts;
 595         if ( pts < 0 )
 596         {
 597             pts = pv->pts_next;
 598         }
 599         pv->pts_next = pts + frame_dur;
 600
 601         hb_buffer_t *buf;
 602
 603         // if we're doing a scan or this content couldn't have been broken
 604         // by Microsoft we don't worry about timestamp reordering
 605         if ( ! pv->job || ! pv->brokenByMicrosoft )
 606         {
 607             buf = copy_frame( pv, &frame );
 608             buf->start = pts;
 609             hb_list_add( pv->list, buf );
 610             ++pv->nframes;
 611             return got_picture;
 612         }
 613
 614         // XXX This following probably addresses a libavcodec bug but I don't
 615         //     see an easy fix so we workaround it here.
 616         //
 617         // The M$ 'packed B-frames' atrocity results in decoded frames with
 618         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 619         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 620         // The frames are actually delivered in the right order but with
 621         // the wrong timestamp. To get the correct timestamp attached to
 622         // each frame we have a delay queue (longer than the max number of
 623         // b-frames) & a sorting heap for the timestamps. As each frame
 624         // comes out of the decoder the oldest frame in the queue is removed
 625         // and associated with the smallest timestamp. Then the new frame is
 626         // added to the queue & its timestamp is pushed on the heap.
 627         // This does nothing if the timestamps are correct (i.e., the video
 628         // uses a codec that Micro$oft hasn't broken yet) but the frames
 629         // get timestamped correctly even when M$ has munged them.
 630
 631         // remove the oldest picture from the frame queue (if any) &
 632         // give it the smallest timestamp from our heap. The queue size
 633         // is a power of two so we get the slot of the oldest by masking
 634         // the frame count & this will become the slot of the newest
 635         // once we've removed & processed the oldest.
 636         int slot = pv->nframes & (HEAP_SIZE-1);
 637         if ( ( buf = pv->delayq[slot] ) != NULL )
 638         {
 639             buf->start = heap_pop( &pv->pts_heap );
 640
 641             if ( pv->new_chap && buf->start >= pv->chap_time )
 642             {
 643                 buf->new_chap = pv->new_chap;
 644                 pv->new_chap = 0;
 645                 pv->chap_time = 0;
 646                 log_chapter( pv, buf->new_chap, buf->start );
 647             }
 648             else if ( pv->nframes == 0 )
 649             {
 650                 log_chapter( pv, pv->job->chapter_start, buf->start );
 651             }
 652             hb_list_add( pv->list, buf );
 653         }
 654
 655         // add the new frame to the delayq & push its timestamp on the heap
 656         pv->delayq[slot] = copy_frame( pv, &frame );
 657         heap_push( &pv->pts_heap, pts );
 658
 659         ++pv->nframes;
 660     }
 661
 662     return got_picture;
 663 }
 664
 665 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 666                          int64_t pts, int64_t dts )
 667 {
 668     /*
 669      * The following loop is a do..while because we need to handle both
 670      * data & the flush at the end (signaled by size=0). At the end there's
 671      * generally a frame in the parser & one or more frames in the decoder
 672      * (depending on the bframes setting).
 673      */
 674     int pos = 0;
 675     do {
 676         uint8_t *pout;
 677         int pout_len;
 678         int len = av_parser_parse( pv->parser, pv->context, &pout, &pout_len,
 679                                    data + pos, size - pos, pts, dts );
 680         pos += len;
 681
 682         if ( pout_len > 0 )
 683         {
 684             pv->pts = pv->parser->pts;
 685             decodeFrame( pv, pout, pout_len );
 686         }
 687     } while ( pos < size );
 688
 689     /* the stuff above flushed the parser, now flush the decoder */
 690     if ( size <= 0 )
 691     {
 692         while ( decodeFrame( pv, NULL, 0 ) )
 693         {
 694         }
 695         flushDelayQueue( pv );
 696     }
 697 }
 698
 699 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 700 {
 701     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 702
 703     if ( head )
 704     {
 705         hb_list_rem( pv->list, head );
 706
 707         hb_buffer_t *last = head, *buf;
 708
 709         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 710         {
 711             hb_list_rem( pv->list, buf );
 712             last->next = buf;
 713             last = buf;
 714         }
 715     }
 716     return head;
 717 }
 718
 719
 720 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 721 {
 722
 723     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 724     w->private_data = pv;
 725     pv->job   = job;
 726     pv->list = hb_list_init();
 727
 728     int codec_id = w->codec_param;
 729     pv->parser = av_parser_init( codec_id );
 730     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 731
 732     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 733     pv->context->opaque = pv;
 734     pv->context->get_buffer = get_frame_buf;
 735
 736     return 0;
 737 }
 738
 739 static int next_hdr( hb_buffer_t *in, int offset )
 740 {
 741     uint8_t *dat = in->data;
 742     uint16_t last2 = 0xffff;
 743     for ( ; in->size - offset > 1; ++offset )
 744     {
 745         if ( last2 == 0 && dat[offset] == 0x01 )
 746             // found an mpeg start code
 747             return offset - 2;
 748
 749         last2 = ( last2 << 8 ) | dat[offset];
 750     }
 751
 752     return -1;
 753 }
 754
 755 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 756 {
 757     if ( in->size - offset < 4 )
 758         // not enough room for an mpeg start code
 759         return -1;
 760
 761     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 762     {
 763         if ( in->data[offset+3] == hdr_type )
 764             // found it
 765             break;
 766     }
 767     return offset;
 768 }
 769
 770 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 771 {
 772     hb_work_private_t *pv = w->private_data;
 773
 774     // we can't call the avstream funcs but the read_header func in the
 775     // AVInputFormat may set up some state in the AVContext. In particular
 776     // vc1t_read_header allocates 'extradata' to deal with header issues
 777     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 778     // of space to make vc1 work then associate the codec with the context.
 779     if ( w->codec_param != CODEC_ID_VC1 )
 780     {
 781         // we haven't been inflicted with M$ - allocate a little space as
 782         // a marker and return success.
 783         pv->context->extradata_size = 16;
 784         pv->context->extradata = av_malloc(pv->context->extradata_size);
 785         return 0;
 786     }
 787
 788     // find the start and and of the sequence header
 789     int shdr, shdr_end;
 790     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 791     {
 792         // didn't find start of seq hdr
 793         return 1;
 794     }
 795     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 796     {
 797         shdr_end = in->size;
 798     }
 799     shdr_end -= shdr;
 800
 801     // find the start and and of the entry point header
 802     int ehdr, ehdr_end;
 803     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 804     {
 805         // didn't find start of entry point hdr
 806         return 1;
 807     }
 808     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 809     {
 810         ehdr_end = in->size;
 811     }
 812     ehdr_end -= ehdr;
 813
 814     // found both headers - allocate an extradata big enough to hold both
 815     // then copy them into it.
 816     pv->context->extradata_size = shdr_end + ehdr_end;
 817     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 818     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 819     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 820     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 821     return 0;
 822 }
 823
 824 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 825                             hb_buffer_t ** buf_out )
 826 {
 827     hb_work_private_t *pv = w->private_data;
 828     hb_buffer_t *in = *buf_in;
 829     int64_t pts = AV_NOPTS_VALUE;
 830     int64_t dts = pts;
 831
 832     *buf_in = NULL;
 833
 834     /* if we got an empty buffer signaling end-of-stream send it downstream */
 835     if ( in->size == 0 )
 836     {
 837         decodeVideo( pv, in->data, in->size, pts, dts );
 838         hb_list_add( pv->list, in );
 839         *buf_out = link_buf_list( pv );
 840         return HB_WORK_DONE;
 841     }
 842
 843     // if this is the first frame open the codec (we have to wait for the
 844     // first frame because of M$ VC1 braindamage).
 845     if ( pv->context->extradata_size == 0 )
 846     {
 847         if ( setup_extradata( w, in ) )
 848         {
 849             // we didn't find the headers needed to set up extradata.
 850             // the codec will abort if we open it so just free the buf
 851             // and hope we eventually get the info we need.
 852             hb_buffer_close( &in );
 853             return HB_WORK_OK;
 854         }
 855         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 856         // There's a mis-feature in ffmpeg that causes the context to be
 857         // incorrectly initialized the 1st time avcodec_open is called.
 858         // If you close it and open a 2nd time, it finishes the job.
 859         hb_avcodec_open( pv->context, codec );
 860         hb_avcodec_close( pv->context );
 861         hb_avcodec_open( pv->context, codec );
 862     }
 863
 864     if( in->start >= 0 )
 865     {
 866         pts = in->start;
 867         dts = in->renderOffset;
 868     }
 869     if ( in->new_chap )
 870     {
 871         pv->new_chap = in->new_chap;
 872         pv->chap_time = pts >= 0? pts : pv->pts_next;
 873     }
 874     decodeVideo( pv, in->data, in->size, pts, dts );
 875     hb_buffer_close( &in );
 876     *buf_out = link_buf_list( pv );
 877     return HB_WORK_OK;
 878 }
 879
 880 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 881 {
 882     hb_work_private_t *pv = w->private_data;
 883
 884     memset( info, 0, sizeof(*info) );
 885
 886     if ( pv && pv->context )
 887     {
 888         AVCodecContext *context = pv->context;
 889         info->bitrate = context->bit_rate;
 890         info->width = context->width;
 891         info->height = context->height;
 892
 893         /* ffmpeg gives the frame rate in frames per second while HB wants
 894          * it in units of the 27MHz MPEG clock. */
 895         info->rate = 27000000;
 896         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 897                           context->time_base.den;
 898
 899         /* Sometimes there's no pixel aspect set in the source. In that case,
 900            assume a 1:1 PAR. Otherwise, preserve the source PAR.             */
 901         info->pixel_aspect_width = context->sample_aspect_ratio.num ?
 902                                         context->sample_aspect_ratio.num : 1;
 903         info->pixel_aspect_height = context->sample_aspect_ratio.den ?
 904                                         context->sample_aspect_ratio.den : 1;
 905
 906         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 907          * Display Aspect Ratio so we convert by scaling by the Storage
 908          * Aspect Ratio (w/h). We do the calc in floating point to get the
 909          * rounding right. */
 910         info->aspect = (double)info->pixel_aspect_width *
 911                        (double)context->width /
 912                        (double)info->pixel_aspect_height /
 913                        (double)context->height;
 914
 915         info->profile = context->profile;
 916         info->level = context->level;
 917         info->name = context->codec->name;
 918         return 1;
 919     }
 920     return 0;
 921 }
 922
 923 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 924                              hb_work_info_t *info )
 925 {
 926     return 0;
 927 }
 928
 929 hb_work_object_t hb_decavcodecv =
 930 {
 931     WORK_DECAVCODECV,
 932     "Video decoder (libavcodec)",
 933     decavcodecvInit,
 934     decavcodecvWork,
 935     decavcodecClose,
 936     decavcodecvInfo,
 937     decavcodecvBSInfo
 938 };
 939
 940
 941 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 942 // includes a parser and passes information from the parser to the decoder
 943 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 944 // We *have* to use that codec context to decode the stream or we'll get
 945 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 946 // codec context in our codec_param.
 947
 948 // this routine gets the appropriate context pointer from the ffmpeg
 949 // stream reader. it can't be called until we get the first buffer because
 950 // we can't guarantee that reader will be called before the our init
 951 // routine and if our init is called first we'll get a pointer to the
 952 // old scan stream (which has already been closed).
 953 static void init_ffmpeg_context( hb_work_object_t *w )
 954 {
 955     hb_work_private_t *pv = w->private_data;
 956     pv->context = hb_ffmpeg_context( w->codec_param );
 957
 958     // during scan the decoder gets closed & reopened which will
 959     // close the codec so reopen it if it's not there
 960     if ( ! pv->context->codec )
 961     {
 962         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 963         hb_avcodec_open( pv->context, codec );
 964     }
 965     // set up our best guess at the frame duration.
 966     // the frame rate in the codec is usually bogus but it's sometimes
 967     // ok in the stream.
 968     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 969
 970     if ( st->nb_frames && st->duration )
 971     {
 972         // compute the average frame duration from the total number
 973         // of frames & the total duration.
 974         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 975                        ( (double)st->nb_frames * (double)st->time_base.den );
 976     }
 977     else
 978     {
 979         // XXX We don't have a frame count or duration so try to use the
 980         // far less reliable time base info in the stream.
 981         // Because the time bases are so screwed up, we only take values
 982         // in the range 8fps - 64fps.
 983         AVRational tb;
 984         if ( st->time_base.num * 64 > st->time_base.den &&
 985              st->time_base.den > st->time_base.num * 8 )
 986         {
 987             tb = st->time_base;
 988         }
 989         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
 990                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
 991         {
 992             tb.num = st->r_frame_rate.den;
 993             tb.den = st->r_frame_rate.num;
 994         }
 995         else
 996         {
 997             tb.num = 1001;  /*XXX*/
 998             tb.den = 24000; /*XXX*/
 999         }
1000         pv->duration =  (double)tb.num / (double)tb.den;
1001     }
1002     pv->duration *= 90000.;
1003
1004     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1005     pv->context->opaque = pv;
1006     pv->context->get_buffer = get_frame_buf;
1007
1008     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1009     // b-frames abortion that messes up frame ordering and timestamps.
1010     // XXX ffmpeg knows which streams are broken but doesn't expose the
1011     //     info externally. We should patch ffmpeg to add a flag to the
1012     //     codec context for this but until then we mark all ffmpeg streams
1013     //     as suspicious.
1014     pv->brokenByMicrosoft = 1;
1015 }
1016
1017 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1018 {
1019     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1020     // will seg fault in odd, data dependent ways if it's not there. (my guess
1021     // is this is a case of a local performance optimization creating a global
1022     // performance degradation since all the time wasted by extraneous data
1023     // copies & memory zeroing has to be huge compared to the minor reduction
1024     // in inner-loop instructions this affords - modern cpus bottleneck on
1025     // memory bandwidth not instruction bandwidth).
1026     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1027     {
1028         // have to realloc to add the padding
1029         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1030     }
1031     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1032 }
1033
1034 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1035 {
1036
1037     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1038     w->private_data = pv;
1039     pv->job   = job;
1040     pv->list = hb_list_init();
1041     pv->pts_next = -1;
1042     pv->pts = -1;
1043     return 0;
1044 }
1045
1046 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1047                              hb_buffer_t ** buf_out )
1048 {
1049     hb_work_private_t *pv = w->private_data;
1050     if ( ! pv->context )
1051     {
1052         init_ffmpeg_context( w );
1053     }
1054     hb_buffer_t *in = *buf_in;
1055     *buf_in = NULL;
1056
1057     /* if we got an empty buffer signaling end-of-stream send it downstream */
1058     if ( in->size == 0 )
1059     {
1060         /* flush any frames left in the decoder */
1061         while ( decodeFrame( pv, NULL, 0 ) )
1062         {
1063         }
1064         flushDelayQueue( pv );
1065         hb_list_add( pv->list, in );
1066         *buf_out = link_buf_list( pv );
1067         return HB_WORK_DONE;
1068     }
1069
1070     int64_t pts = in->start;
1071     if( pts >= 0 )
1072     {
1073         // use the first timestamp as our 'next expected' pts
1074         if ( pv->pts_next < 0 )
1075         {
1076             pv->pts_next = pts;
1077         }
1078         pv->pts = pts;
1079     }
1080
1081     if ( in->new_chap )
1082     {
1083         pv->new_chap = in->new_chap;
1084         pv->chap_time = pts >= 0? pts : pv->pts_next;
1085     }
1086     prepare_ffmpeg_buffer( in );
1087     decodeFrame( pv, in->data, in->size );
1088     hb_buffer_close( &in );
1089     *buf_out = link_buf_list( pv );
1090     return HB_WORK_OK;
1091 }
1092
1093 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1094 {
1095     if ( decavcodecvInfo( w, info ) )
1096     {
1097         hb_work_private_t *pv = w->private_data;
1098         if ( ! pv->context )
1099         {
1100             init_ffmpeg_context( w );
1101         }
1102         // we have the frame duration in units of the 90KHz pts clock but
1103         // need it in units of the 27MHz MPEG clock. */
1104         info->rate = 27000000;
1105         info->rate_base = pv->duration * 300.;
1106         return 1;
1107     }
1108     return 0;
1109 }
1110
1111 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1112 {
1113     AVCodecContext *context = pv->context;
1114     int pos = 0;
1115
1116     while ( pos < size )
1117     {
1118         int16_t *buffer = pv->buffer;
1119         if ( buffer == NULL )
1120         {
1121             // XXX ffmpeg bug workaround
1122             // malloc a buffer for the audio decode. On an x86, ffmpeg
1123             // uses mmx/sse instructions on this buffer without checking
1124             // that it's 16 byte aligned and this will cause an abort if
1125             // the buffer is allocated on our stack. Rather than doing
1126             // complicated, machine dependent alignment here we use the
1127             // fact that malloc returns an aligned pointer on most architectures.
1128
1129             #ifdef SYS_CYGWIN
1130                 // Cygwin's malloc doesn't appear to return 16-byte aligned memory so use memalign instead.
1131                pv->buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
1132             #else
1133                 pv->buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1134             #endif
1135
1136             buffer = pv->buffer;
1137         }
1138         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1139         int len = avcodec_decode_audio2( context, buffer, &out_size,
1140                                          data + pos, size - pos );
1141         if ( len <= 0 )
1142         {
1143             return;
1144         }
1145         pos += len;
1146         if( out_size > 0 )
1147         {
1148             // We require signed 16-bit ints for the output format. If
1149             // we got something different convert it.
1150             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1151             {
1152                 // Note: av_audio_convert seems to be a work-in-progress but
1153                 //       looks like it will eventually handle general audio
1154                 //       mixdowns which would allow us much more flexibility
1155                 //       in handling multichannel audio in HB. If we were doing
1156                 //       anything more complicated than a one-for-one format
1157                 //       conversion we'd probably want to cache the converter
1158                 //       context in the pv.
1159                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1160                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1161                                                               context->sample_fmt, 1,
1162                                                               NULL, 0 );
1163                 // get output buffer size (in 2-byte samples) then malloc a buffer
1164                 out_size = ( out_size * 2 ) / isamp;
1165                 buffer = malloc( out_size );
1166
1167                 // we're doing straight sample format conversion which behaves as if
1168                 // there were only one channel.
1169                 const void * const ibuf[6] = { pv->buffer };
1170                 void * const obuf[6] = { buffer };
1171                 const int istride[6] = { isamp };
1172                 const int ostride[6] = { 2 };
1173
1174                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1175                 av_audio_convert_free( ctx );
1176             }
1177             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1178
1179             // convert from bytes to total samples
1180             out_size >>= 1;
1181
1182             double pts = pv->pts_next;
1183             buf->start = pts;
1184             pts += out_size * pv->duration;
1185             buf->stop  = pts;
1186             pv->pts_next = pts;
1187
1188             float *fl32 = (float *)buf->data;
1189             int i;
1190             for( i = 0; i < out_size; ++i )
1191             {
1192                 fl32[i] = buffer[i];
1193             }
1194             hb_list_add( pv->list, buf );
1195
1196             // if we allocated a buffer for sample format conversion, free it
1197             if ( buffer != pv->buffer )
1198             {
1199                 free( buffer );
1200             }
1201         }
1202     }
1203 }
1204
1205 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1206                     hb_buffer_t **buf_out )
1207 {
1208     if ( (*buf_in)->size <= 0 )
1209     {
1210         /* EOF on input stream - send it downstream & say that we're done */
1211         *buf_out = *buf_in;
1212         *buf_in = NULL;
1213         return HB_WORK_DONE;
1214     }
1215
1216     hb_work_private_t *pv = w->private_data;
1217
1218     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1219     {
1220         // discard buffers that start before video time 0
1221         *buf_out = NULL;
1222         return HB_WORK_OK;
1223     }
1224
1225     if ( ! pv->context )
1226     {
1227         init_ffmpeg_context( w );
1228         // duration is a scaling factor to go from #bytes in the decoded
1229         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1230         // total samples to per-channel samples. 'sample_rate' converts
1231         // per-channel samples to seconds per sample and the 90000
1232         // is mpeg ticks per second.
1233         pv->duration = 90000. /
1234                     (double)( pv->context->sample_rate * pv->context->channels );
1235     }
1236     hb_buffer_t *in = *buf_in;
1237
1238     // if the packet has a timestamp use it if we don't have a timestamp yet
1239     // or if there's been a timing discontinuity of more than 100ms.
1240     if ( in->start >= 0 &&
1241          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1242     {
1243         pv->pts_next = in->start;
1244     }
1245     prepare_ffmpeg_buffer( in );
1246     decodeAudio( pv, in->data, in->size );
1247     *buf_out = link_buf_list( pv );
1248
1249     return HB_WORK_OK;
1250 }
1251
1252 hb_work_object_t hb_decavcodecvi =
1253 {
1254     WORK_DECAVCODECVI,
1255     "Video decoder (ffmpeg streams)",
1256     decavcodecviInit,
1257     decavcodecviWork,
1258     decavcodecClose,
1259     decavcodecviInfo,
1260     decavcodecvBSInfo
1261 };
1262
1263 hb_work_object_t hb_decavcodecai =
1264 {
1265     WORK_DECAVCODECAI,
1266     "Audio decoder (ffmpeg streams)",
1267     decavcodecviInit,
1268     decavcodecaiWork,
1269     decavcodecClose,
1270     decavcodecInfo,
1271     decavcodecBSInfo
1272 };