libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64
  65 //#include "libavcodec/audioconvert.h"
  66 #include "../contrib/ffmpeg/libavcodec/audioconvert.h"
  67
  68 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  69 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  70 static void decavcodecClose( hb_work_object_t * );
  71 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  72 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  73
  74 hb_work_object_t hb_decavcodec =
  75 {
  76     WORK_DECAVCODEC,
  77     "MPGA decoder (libavcodec)",
  78     decavcodecInit,
  79     decavcodecWork,
  80     decavcodecClose,
  81     decavcodecInfo,
  82     decavcodecBSInfo
  83 };
  84
  85 #define HEAP_SIZE 8
  86 typedef struct {
  87     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  88     // heap is 1). The 0th slot is unused - a marker is put there to check
  89     // for overwrite errs.
  90     int64_t h[HEAP_SIZE+1];
  91     int     nheap;
  92 } pts_heap_t;
  93
  94 struct hb_work_private_s
  95 {
  96     hb_job_t        *job;
  97     AVCodecContext  *context;
  98     AVCodecParserContext *parser;
  99     hb_list_t       *list;
 100     double          duration;   // frame duration (for video)
 101     double          pts_next;   // next pts we expect to generate
 102     int64_t         pts;        // (video) pts passing from parser to decoder
 103     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 104     int             new_chap;   // output chapter mark pending
 105     uint32_t        nframes;
 106     uint32_t        ndrops;
 107     uint32_t        decode_errors;
 108     int             brokenByMicrosoft; // video stream may contain packed b-frames
 109     hb_buffer_t*    delayq[HEAP_SIZE];
 110     pts_heap_t      pts_heap;
 111     void*           buffer;
 112     struct SwsContext *sws_context; // if we have to rescale or convert color space
 113 };
 114
 115 static int64_t heap_pop( pts_heap_t *heap )
 116 {
 117     int64_t result;
 118
 119     if ( heap->nheap <= 0 )
 120     {
 121         return -1;
 122     }
 123
 124     // return the top of the heap then put the bottom element on top,
 125     // decrease the heap size by one & rebalence the heap.
 126     result = heap->h[1];
 127
 128     int64_t v = heap->h[heap->nheap--];
 129     int parent = 1;
 130     int child = parent << 1;
 131     while ( child <= heap->nheap )
 132     {
 133         // find the smallest of the two children of parent
 134         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 135             ++child;
 136
 137         if (v <= heap->h[child])
 138             // new item is smaller than either child so it's the new parent.
 139             break;
 140
 141         // smallest child is smaller than new item so move it up then
 142         // check its children.
 143         int64_t hp = heap->h[child];
 144         heap->h[parent] = hp;
 145         parent = child;
 146         child = parent << 1;
 147     }
 148     heap->h[parent] = v;
 149     return result;
 150 }
 151
 152 static void heap_push( pts_heap_t *heap, int64_t v )
 153 {
 154     if ( heap->nheap < HEAP_SIZE )
 155     {
 156         ++heap->nheap;
 157     }
 158
 159     // stick the new value on the bottom of the heap then bubble it
 160     // up to its correct spot.
 161         int child = heap->nheap;
 162         while (child > 1) {
 163                 int parent = child >> 1;
 164                 if (heap->h[parent] <= v)
 165                         break;
 166                 // move parent down
 167                 int64_t hp = heap->h[parent];
 168                 heap->h[child] = hp;
 169                 child = parent;
 170         }
 171         heap->h[child] = v;
 172 }
 173
 174
 175 /***********************************************************************
 176  * hb_work_decavcodec_init
 177  ***********************************************************************
 178  *
 179  **********************************************************************/
 180 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 181 {
 182     AVCodec * codec;
 183
 184     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 185     w->private_data = pv;
 186
 187     pv->job   = job;
 188
 189     int codec_id = w->codec_param;
 190     /*XXX*/
 191     if ( codec_id == 0 )
 192         codec_id = CODEC_ID_MP2;
 193
 194     codec = avcodec_find_decoder( codec_id );
 195     pv->parser = av_parser_init( codec_id );
 196
 197     pv->context = avcodec_alloc_context();
 198     hb_avcodec_open( pv->context, codec );
 199
 200     return 0;
 201 }
 202
 203 /***********************************************************************
 204  * Close
 205  ***********************************************************************
 206  *
 207  **********************************************************************/
 208 static void decavcodecClose( hb_work_object_t * w )
 209 {
 210     hb_work_private_t * pv = w->private_data;
 211
 212     if ( pv )
 213     {
 214         if ( pv->job && pv->context && pv->context->codec )
 215         {
 216             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 217                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 218                     pv->ndrops );
 219         }
 220         if ( pv->sws_context )
 221         {
 222             sws_freeContext( pv->sws_context );
 223         }
 224         if ( pv->parser )
 225         {
 226             av_parser_close(pv->parser);
 227         }
 228         if ( pv->context && pv->context->codec )
 229         {
 230             hb_avcodec_close( pv->context );
 231         }
 232         if ( pv->list )
 233         {
 234             hb_list_close( &pv->list );
 235         }
 236         if ( pv->buffer )
 237         {
 238             free( pv->buffer );
 239             pv->buffer = NULL;
 240         }
 241         free( pv );
 242         w->private_data = NULL;
 243     }
 244 }
 245
 246 /***********************************************************************
 247  * Work
 248  ***********************************************************************
 249  *
 250  **********************************************************************/
 251 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 252                     hb_buffer_t ** buf_out )
 253 {
 254     hb_work_private_t * pv = w->private_data;
 255     hb_buffer_t * in = *buf_in, * buf, * last = NULL;
 256     int   pos, len, out_size, i, uncompressed_len;
 257     short buffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
 258     uint64_t cur;
 259     unsigned char *parser_output_buffer;
 260     int parser_output_buffer_len;
 261
 262     if ( (*buf_in)->size <= 0 )
 263     {
 264         /* EOF on input stream - send it downstream & say that we're done */
 265         *buf_out = *buf_in;
 266         *buf_in = NULL;
 267         return HB_WORK_DONE;
 268     }
 269
 270     *buf_out = NULL;
 271
 272     if ( in->start < -1 && pv->pts_next <= 0 )
 273     {
 274         // discard buffers that start before video time 0
 275         return HB_WORK_OK;
 276     }
 277
 278     cur = ( in->start < 0 )? pv->pts_next : in->start;
 279
 280     pos = 0;
 281     while( pos < in->size )
 282     {
 283         len = av_parser_parse( pv->parser, pv->context,
 284                                &parser_output_buffer, &parser_output_buffer_len,
 285                                in->data + pos, in->size - pos, cur, cur );
 286         out_size = 0;
 287         uncompressed_len = 0;
 288         if (parser_output_buffer_len)
 289         {
 290             out_size = sizeof(buffer);
 291             uncompressed_len = avcodec_decode_audio2( pv->context, buffer,
 292                                                       &out_size,
 293                                                       parser_output_buffer,
 294                                                       parser_output_buffer_len );
 295         }
 296         if( out_size )
 297         {
 298             short * s16;
 299             float * fl32;
 300
 301             buf = hb_buffer_init( 2 * out_size );
 302
 303             int sample_size_in_bytes = 2;   // Default to 2 bytes
 304             switch (pv->context->sample_fmt)
 305             {
 306               case SAMPLE_FMT_S16:
 307                 sample_size_in_bytes = 2;
 308                 break;
 309               /* We should handle other formats here - but that needs additional format conversion work below */
 310               /* For now we'll just report the error and try to carry on */
 311               default:
 312                 hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
 313                 break;
 314             }
 315
 316             buf->start = cur;
 317             buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
 318                          pv->context->sample_rate;
 319             cur = buf->stop;
 320
 321             s16  = buffer;
 322             fl32 = (float *) buf->data;
 323             for( i = 0; i < out_size / 2; i++ )
 324             {
 325                 fl32[i] = s16[i];
 326             }
 327
 328             if( last )
 329             {
 330                 last = last->next = buf;
 331             }
 332             else
 333             {
 334                 *buf_out = last = buf;
 335             }
 336         }
 337
 338         pos += len;
 339     }
 340
 341     pv->pts_next = cur;
 342
 343     return HB_WORK_OK;
 344 }
 345
 346 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 347 {
 348     hb_work_private_t *pv = w->private_data;
 349
 350     memset( info, 0, sizeof(*info) );
 351
 352     if ( pv && pv->context )
 353     {
 354         AVCodecContext *context = pv->context;
 355         info->bitrate = context->bit_rate;
 356         info->rate = context->time_base.num;
 357         info->rate_base = context->time_base.den;
 358         info->profile = context->profile;
 359         info->level = context->level;
 360         return 1;
 361     }
 362     return 0;
 363 }
 364
 365 static const int chan2layout[] = {
 366     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 367     HB_INPUT_CH_LAYOUT_MONO,
 368     HB_INPUT_CH_LAYOUT_STEREO,
 369     HB_INPUT_CH_LAYOUT_2F1R,
 370     HB_INPUT_CH_LAYOUT_2F2R,
 371     HB_INPUT_CH_LAYOUT_3F2R,
 372     HB_INPUT_CH_LAYOUT_4F2R,
 373     HB_INPUT_CH_LAYOUT_STEREO,
 374     HB_INPUT_CH_LAYOUT_STEREO,
 375 };
 376
 377 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 378                              hb_work_info_t *info )
 379 {
 380     hb_work_private_t *pv = w->private_data;
 381     int ret = 0;
 382
 383     memset( info, 0, sizeof(*info) );
 384
 385     if ( pv && pv->context )
 386     {
 387         return decavcodecInfo( w, info );
 388     }
 389     // XXX
 390     // We should parse the bitstream to find its parameters but for right
 391     // now we just return dummy values if there's a codec that will handle it.
 392     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 393                                                            CODEC_ID_MP2 );
 394     if ( ! codec )
 395     {
 396         // there's no ffmpeg codec for this audio type - give up
 397         return -1;
 398     }
 399
 400     static char codec_name[64];
 401     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 402
 403     AVCodecParserContext *parser = av_parser_init( codec->id );
 404     AVCodecContext *context = avcodec_alloc_context();
 405     hb_avcodec_open( context, codec );
 406 #ifdef SYS_CYGWIN
 407     uint8_t *buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
 408 #else
 409     uint8_t *buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 410 #endif
 411     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 412     unsigned char *pbuffer;
 413     int pos = 0, pbuffer_size;
 414
 415     while ( pos < buf->size )
 416     {
 417         int len = av_parser_parse( parser, context, &pbuffer, &pbuffer_size,
 418                                    buf->data + pos, buf->size - pos,
 419                                    buf->start, buf->start );
 420         pos += len;
 421         if ( pbuffer_size > 0 )
 422         {
 423             len = avcodec_decode_audio2( context, (int16_t*)buffer, &out_size,
 424                                          pbuffer, pbuffer_size );
 425             if ( len > 0 && context->sample_rate > 0 )
 426             {
 427                 info->bitrate = context->bit_rate;
 428                 info->rate = context->sample_rate;
 429                 info->rate_base = 1;
 430                 info->channel_layout = chan2layout[context->channels & 7];
 431                 ret = 1;
 432                 break;
 433             }
 434         }
 435     }
 436     free( buffer );
 437     av_parser_close( parser );
 438     hb_avcodec_close( context );
 439     return ret;
 440 }
 441
 442 /* -------------------------------------------------------------
 443  * General purpose video decoder using libavcodec
 444  */
 445
 446 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 447                             int h )
 448 {
 449     if ( dstride == sstride )
 450     {
 451         memcpy( dst, src, dstride * h );
 452         return dst + dstride * h;
 453     }
 454     int lbytes = dstride <= sstride? dstride : sstride;
 455     while ( --h >= 0 )
 456     {
 457         memcpy( dst, src, lbytes );
 458         src += sstride;
 459         dst += dstride;
 460     }
 461     return dst;
 462 }
 463
 464 // copy one video frame into an HB buf. If the frame isn't in our color space
 465 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 466 // Otherwise just copy the bits.
 467 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 468 {
 469     AVCodecContext *context = pv->context;
 470     int w, h;
 471     if ( ! pv->job )
 472     {
 473         // if the dimensions are odd, drop the lsb since h264 requires that
 474         // both width and height be even.
 475         w = ( context->width >> 1 ) << 1;
 476         h = ( context->height >> 1 ) << 1;
 477     }
 478     else
 479     {
 480         w =  pv->job->title->width;
 481         h =  pv->job->title->height;
 482     }
 483     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 484     uint8_t *dst = buf->data;
 485
 486     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 487          h != context->height )
 488     {
 489         // have to convert to our internal color space and/or rescale
 490         AVPicture dstpic;
 491         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 492
 493         if ( ! pv->sws_context )
 494         {
 495             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 496                                               w, h, PIX_FMT_YUV420P,
 497                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 498                                               NULL, NULL, NULL );
 499         }
 500         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 501                    dstpic.data, dstpic.linesize );
 502     }
 503     else
 504     {
 505         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 506         w = (w + 1) >> 1; h = (h + 1) >> 1;
 507         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 508         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 509     }
 510     return buf;
 511 }
 512
 513 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 514 {
 515     hb_work_private_t *pv = context->opaque;
 516     frame->pts = pv->pts;
 517     pv->pts = -1;
 518     return avcodec_default_get_buffer( context, frame );
 519 }
 520
 521 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 522 {
 523     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 524     if ( c && c->title )
 525     {
 526         hb_log( "%s: \"%s\" (%d) at frame %u time %lld",
 527                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 528     }
 529     else
 530     {
 531         hb_log( "%s: Chapter %d at frame %u time %lld",
 532                 pv->context->codec->name, chap_num, pv->nframes, pts );
 533     }
 534 }
 535
 536 static void flushDelayQueue( hb_work_private_t *pv )
 537 {
 538     hb_buffer_t *buf;
 539     int slot = pv->nframes & (HEAP_SIZE-1);
 540
 541     // flush all the video packets left on our timestamp-reordering delay q
 542     while ( ( buf = pv->delayq[slot] ) != NULL )
 543     {
 544         buf->start = heap_pop( &pv->pts_heap );
 545         hb_list_add( pv->list, buf );
 546         pv->delayq[slot] = NULL;
 547         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 548     }
 549 }
 550
 551 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 552 {
 553     int got_picture;
 554     AVFrame frame;
 555
 556     if ( avcodec_decode_video( pv->context, &frame, &got_picture, data, size ) < 0 )
 557     {
 558         ++pv->decode_errors;
 559     }
 560     if( got_picture )
 561     {
 562         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 563         // packet had a pts we handed it to av_parser_parse (if the packet had
 564         // no pts we set it to -1 but before the parse we can't distinguish between
 565         // the start of a video frame with no pts & an intermediate packet of
 566         // some frame which never has a pts). we hope that when parse returns
 567         // the frame to us the pts we originally handed it will be in parser->pts.
 568         // we put this pts into pv->pts so that when a avcodec_decode_video
 569         // finally gets around to allocating an AVFrame to hold the decoded
 570         // frame we can stuff that pts into the frame. if all of these relays
 571         // worked at this point frame.pts should hold the frame's pts from the
 572         // original data stream or -1 if it didn't have one. in the latter case
 573         // we generate the next pts in sequence for it.
 574         double frame_dur = pv->duration;
 575         if ( frame_dur <= 0 )
 576         {
 577             frame_dur = 90000. * (double)pv->context->time_base.num /
 578                         (double)pv->context->time_base.den;
 579             pv->duration = frame_dur;
 580         }
 581         if ( frame.repeat_pict )
 582         {
 583             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 584         }
 585         // If there was no pts for this frame, assume constant frame rate
 586         // video & estimate the next frame time from the last & duration.
 587         double pts = frame.pts;
 588         if ( pts < 0 )
 589         {
 590             pts = pv->pts_next;
 591         }
 592         pv->pts_next = pts + frame_dur;
 593
 594         hb_buffer_t *buf;
 595
 596         // if we're doing a scan or this content couldn't have been broken
 597         // by Microsoft we don't worry about timestamp reordering
 598         if ( ! pv->job || ! pv->brokenByMicrosoft )
 599         {
 600             buf = copy_frame( pv, &frame );
 601             buf->start = pts;
 602             hb_list_add( pv->list, buf );
 603             ++pv->nframes;
 604             return got_picture;
 605         }
 606
 607         // XXX This following probably addresses a libavcodec bug but I don't
 608         //     see an easy fix so we workaround it here.
 609         //
 610         // The M$ 'packed B-frames' atrocity results in decoded frames with
 611         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 612         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 613         // The frames are actually delivered in the right order but with
 614         // the wrong timestamp. To get the correct timestamp attached to
 615         // each frame we have a delay queue (longer than the max number of
 616         // b-frames) & a sorting heap for the timestamps. As each frame
 617         // comes out of the decoder the oldest frame in the queue is removed
 618         // and associated with the smallest timestamp. Then the new frame is
 619         // added to the queue & its timestamp is pushed on the heap.
 620         // This does nothing if the timestamps are correct (i.e., the video
 621         // uses a codec that Micro$oft hasn't broken yet) but the frames
 622         // get timestamped correctly even when M$ has munged them.
 623
 624         // remove the oldest picture from the frame queue (if any) &
 625         // give it the smallest timestamp from our heap. The queue size
 626         // is a power of two so we get the slot of the oldest by masking
 627         // the frame count & this will become the slot of the newest
 628         // once we've removed & processed the oldest.
 629         int slot = pv->nframes & (HEAP_SIZE-1);
 630         if ( ( buf = pv->delayq[slot] ) != NULL )
 631         {
 632             buf->start = heap_pop( &pv->pts_heap );
 633
 634             if ( pv->new_chap && buf->start >= pv->chap_time )
 635             {
 636                 buf->new_chap = pv->new_chap;
 637                 pv->new_chap = 0;
 638                 pv->chap_time = 0;
 639                 log_chapter( pv, buf->new_chap, buf->start );
 640             }
 641             else if ( pv->nframes == 0 )
 642             {
 643                 log_chapter( pv, pv->job->chapter_start, buf->start );
 644             }
 645             hb_list_add( pv->list, buf );
 646         }
 647
 648         // add the new frame to the delayq & push its timestamp on the heap
 649         pv->delayq[slot] = copy_frame( pv, &frame );
 650         heap_push( &pv->pts_heap, pts );
 651
 652         ++pv->nframes;
 653     }
 654
 655     return got_picture;
 656 }
 657
 658 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 659                          int64_t pts, int64_t dts )
 660 {
 661     /*
 662      * The following loop is a do..while because we need to handle both
 663      * data & the flush at the end (signaled by size=0). At the end there's
 664      * generally a frame in the parser & one or more frames in the decoder
 665      * (depending on the bframes setting).
 666      */
 667     int pos = 0;
 668     do {
 669         uint8_t *pout;
 670         int pout_len;
 671         int len = av_parser_parse( pv->parser, pv->context, &pout, &pout_len,
 672                                    data + pos, size - pos, pts, dts );
 673         pos += len;
 674
 675         if ( pout_len > 0 )
 676         {
 677             pv->pts = pv->parser->pts;
 678             decodeFrame( pv, pout, pout_len );
 679         }
 680     } while ( pos < size );
 681
 682     /* the stuff above flushed the parser, now flush the decoder */
 683     if ( size <= 0 )
 684     {
 685         while ( decodeFrame( pv, NULL, 0 ) )
 686         {
 687         }
 688         flushDelayQueue( pv );
 689     }
 690 }
 691
 692 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 693 {
 694     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 695
 696     if ( head )
 697     {
 698         hb_list_rem( pv->list, head );
 699
 700         hb_buffer_t *last = head, *buf;
 701
 702         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 703         {
 704             hb_list_rem( pv->list, buf );
 705             last->next = buf;
 706             last = buf;
 707         }
 708     }
 709     return head;
 710 }
 711
 712
 713 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 714 {
 715
 716     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 717     w->private_data = pv;
 718     pv->job   = job;
 719     pv->list = hb_list_init();
 720
 721     int codec_id = w->codec_param;
 722     pv->parser = av_parser_init( codec_id );
 723     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 724
 725     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 726     pv->context->opaque = pv;
 727     pv->context->get_buffer = get_frame_buf;
 728
 729     return 0;
 730 }
 731
 732 static int next_hdr( hb_buffer_t *in, int offset )
 733 {
 734     uint8_t *dat = in->data;
 735     uint16_t last2 = 0xffff;
 736     for ( ; in->size - offset > 1; ++offset )
 737     {
 738         if ( last2 == 0 && dat[offset] == 0x01 )
 739             // found an mpeg start code
 740             return offset - 2;
 741
 742         last2 = ( last2 << 8 ) | dat[offset];
 743     }
 744
 745     return -1;
 746 }
 747
 748 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 749 {
 750     if ( in->size - offset < 4 )
 751         // not enough room for an mpeg start code
 752         return -1;
 753
 754     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 755     {
 756         if ( in->data[offset+3] == hdr_type )
 757             // found it
 758             break;
 759     }
 760     return offset;
 761 }
 762
 763 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 764 {
 765     hb_work_private_t *pv = w->private_data;
 766
 767     // we can't call the avstream funcs but the read_header func in the
 768     // AVInputFormat may set up some state in the AVContext. In particular
 769     // vc1t_read_header allocates 'extradata' to deal with header issues
 770     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 771     // of space to make vc1 work then associate the codec with the context.
 772     if ( w->codec_param != CODEC_ID_VC1 )
 773     {
 774         // we haven't been inflicted with M$ - allocate a little space as
 775         // a marker and return success.
 776         pv->context->extradata_size = 16;
 777         pv->context->extradata = av_malloc(pv->context->extradata_size);
 778         return 0;
 779     }
 780
 781     // find the start and and of the sequence header
 782     int shdr, shdr_end;
 783     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 784     {
 785         // didn't find start of seq hdr
 786         return 1;
 787     }
 788     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 789     {
 790         shdr_end = in->size;
 791     }
 792     shdr_end -= shdr;
 793
 794     // find the start and and of the entry point header
 795     int ehdr, ehdr_end;
 796     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 797     {
 798         // didn't find start of entry point hdr
 799         return 1;
 800     }
 801     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 802     {
 803         ehdr_end = in->size;
 804     }
 805     ehdr_end -= ehdr;
 806
 807     // found both headers - allocate an extradata big enough to hold both
 808     // then copy them into it.
 809     pv->context->extradata_size = shdr_end + ehdr_end;
 810     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 811     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 812     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 813     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 814     return 0;
 815 }
 816
 817 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 818                             hb_buffer_t ** buf_out )
 819 {
 820     hb_work_private_t *pv = w->private_data;
 821     hb_buffer_t *in = *buf_in;
 822     int64_t pts = AV_NOPTS_VALUE;
 823     int64_t dts = pts;
 824
 825     *buf_in = NULL;
 826
 827     /* if we got an empty buffer signaling end-of-stream send it downstream */
 828     if ( in->size == 0 )
 829     {
 830         decodeVideo( pv, in->data, in->size, pts, dts );
 831         hb_list_add( pv->list, in );
 832         *buf_out = link_buf_list( pv );
 833         return HB_WORK_DONE;
 834     }
 835
 836     // if this is the first frame open the codec (we have to wait for the
 837     // first frame because of M$ VC1 braindamage).
 838     if ( pv->context->extradata_size == 0 )
 839     {
 840         if ( setup_extradata( w, in ) )
 841         {
 842             // we didn't find the headers needed to set up extradata.
 843             // the codec will abort if we open it so just free the buf
 844             // and hope we eventually get the info we need.
 845             hb_buffer_close( &in );
 846             return HB_WORK_OK;
 847         }
 848         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 849         // There's a mis-feature in ffmpeg that causes the context to be
 850         // incorrectly initialized the 1st time avcodec_open is called.
 851         // If you close it and open a 2nd time, it finishes the job.
 852         hb_avcodec_open( pv->context, codec );
 853         hb_avcodec_close( pv->context );
 854         hb_avcodec_open( pv->context, codec );
 855     }
 856
 857     if( in->start >= 0 )
 858     {
 859         pts = in->start;
 860         dts = in->renderOffset;
 861     }
 862     if ( in->new_chap )
 863     {
 864         pv->new_chap = in->new_chap;
 865         pv->chap_time = pts >= 0? pts : pv->pts_next;
 866     }
 867     decodeVideo( pv, in->data, in->size, pts, dts );
 868     hb_buffer_close( &in );
 869     *buf_out = link_buf_list( pv );
 870     return HB_WORK_OK;
 871 }
 872
 873 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 874 {
 875     hb_work_private_t *pv = w->private_data;
 876
 877     memset( info, 0, sizeof(*info) );
 878
 879     if ( pv && pv->context )
 880     {
 881         AVCodecContext *context = pv->context;
 882         info->bitrate = context->bit_rate;
 883         info->width = context->width;
 884         info->height = context->height;
 885
 886         /* ffmpeg gives the frame rate in frames per second while HB wants
 887          * it in units of the 27MHz MPEG clock. */
 888         info->rate = 27000000;
 889         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 890                           context->time_base.den;
 891
 892         /* Sometimes there's no pixel aspect set in the source. In that case,
 893            assume a 1:1 PAR. Otherwise, preserve the source PAR.             */
 894         info->pixel_aspect_width = context->sample_aspect_ratio.num ?
 895                                         context->sample_aspect_ratio.num : 1;
 896         info->pixel_aspect_height = context->sample_aspect_ratio.den ?
 897                                         context->sample_aspect_ratio.den : 1;
 898
 899         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 900          * Display Aspect Ratio so we convert by scaling by the Storage
 901          * Aspect Ratio (w/h). We do the calc in floating point to get the
 902          * rounding right. */
 903         info->aspect = (double)info->pixel_aspect_width *
 904                        (double)context->width /
 905                        (double)info->pixel_aspect_height /
 906                        (double)context->height;
 907
 908         info->profile = context->profile;
 909         info->level = context->level;
 910         info->name = context->codec->name;
 911         return 1;
 912     }
 913     return 0;
 914 }
 915
 916 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 917                              hb_work_info_t *info )
 918 {
 919     return 0;
 920 }
 921
 922 hb_work_object_t hb_decavcodecv =
 923 {
 924     WORK_DECAVCODECV,
 925     "Video decoder (libavcodec)",
 926     decavcodecvInit,
 927     decavcodecvWork,
 928     decavcodecClose,
 929     decavcodecvInfo,
 930     decavcodecvBSInfo
 931 };
 932
 933
 934 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 935 // includes a parser and passes information from the parser to the decoder
 936 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 937 // We *have* to use that codec context to decode the stream or we'll get
 938 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 939 // codec context in our codec_param.
 940
 941 // this routine gets the appropriate context pointer from the ffmpeg
 942 // stream reader. it can't be called until we get the first buffer because
 943 // we can't guarantee that reader will be called before the our init
 944 // routine and if our init is called first we'll get a pointer to the
 945 // old scan stream (which has already been closed).
 946 static void init_ffmpeg_context( hb_work_object_t *w )
 947 {
 948     hb_work_private_t *pv = w->private_data;
 949     pv->context = hb_ffmpeg_context( w->codec_param );
 950
 951     // during scan the decoder gets closed & reopened which will
 952     // close the codec so reopen it if it's not there
 953     if ( ! pv->context->codec )
 954     {
 955         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 956         hb_avcodec_open( pv->context, codec );
 957     }
 958     // set up our best guess at the frame duration.
 959     // the frame rate in the codec is usually bogus but it's sometimes
 960     // ok in the stream.
 961     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 962
 963     if ( st->nb_frames && st->duration )
 964     {
 965         // compute the average frame duration from the total number
 966         // of frames & the total duration.
 967         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
 968                        ( (double)st->nb_frames * (double)st->time_base.den );
 969     }
 970     else
 971     {
 972         // XXX We don't have a frame count or duration so try to use the
 973         // far less reliable time base info in the stream.
 974         // Because the time bases are so screwed up, we only take values
 975         // in the range 8fps - 64fps.
 976         AVRational tb;
 977         if ( st->time_base.num * 64 > st->time_base.den &&
 978              st->time_base.den > st->time_base.num * 8 )
 979         {
 980             tb = st->time_base;
 981         }
 982         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
 983                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
 984         {
 985             tb.num = st->r_frame_rate.den;
 986             tb.den = st->r_frame_rate.num;
 987         }
 988         else
 989         {
 990             tb.num = 1001;  /*XXX*/
 991             tb.den = 24000; /*XXX*/
 992         }
 993         pv->duration =  (double)tb.num / (double)tb.den;
 994     }
 995     pv->duration *= 90000.;
 996
 997     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
 998     pv->context->opaque = pv;
 999     pv->context->get_buffer = get_frame_buf;
1000
1001     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1002     // b-frames abortion that messes up frame ordering and timestamps.
1003     // XXX ffmpeg knows which streams are broken but doesn't expose the
1004     //     info externally. We should patch ffmpeg to add a flag to the
1005     //     codec context for this but until then we mark all ffmpeg streams
1006     //     as suspicious.
1007     pv->brokenByMicrosoft = 1;
1008 }
1009
1010 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1011 {
1012     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1013     // will seg fault in odd, data dependent ways if it's not there. (my guess
1014     // is this is a case of a local performance optimization creating a global
1015     // performance degradation since all the time wasted by extraneous data
1016     // copies & memory zeroing has to be huge compared to the minor reduction
1017     // in inner-loop instructions this affords - modern cpus bottleneck on
1018     // memory bandwidth not instruction bandwidth).
1019     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1020     {
1021         // have to realloc to add the padding
1022         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1023     }
1024     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1025 }
1026
1027 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1028 {
1029
1030     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1031     w->private_data = pv;
1032     pv->job   = job;
1033     pv->list = hb_list_init();
1034     pv->pts_next = -1;
1035     pv->pts = -1;
1036     return 0;
1037 }
1038
1039 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1040                              hb_buffer_t ** buf_out )
1041 {
1042     hb_work_private_t *pv = w->private_data;
1043     if ( ! pv->context )
1044     {
1045         init_ffmpeg_context( w );
1046     }
1047     hb_buffer_t *in = *buf_in;
1048     *buf_in = NULL;
1049
1050     /* if we got an empty buffer signaling end-of-stream send it downstream */
1051     if ( in->size == 0 )
1052     {
1053         /* flush any frames left in the decoder */
1054         while ( decodeFrame( pv, NULL, 0 ) )
1055         {
1056         }
1057         flushDelayQueue( pv );
1058         hb_list_add( pv->list, in );
1059         *buf_out = link_buf_list( pv );
1060         return HB_WORK_DONE;
1061     }
1062
1063     int64_t pts = in->start;
1064     if( pts >= 0 )
1065     {
1066         // use the first timestamp as our 'next expected' pts
1067         if ( pv->pts_next < 0 )
1068         {
1069             pv->pts_next = pts;
1070         }
1071         pv->pts = pts;
1072     }
1073
1074     if ( in->new_chap )
1075     {
1076         pv->new_chap = in->new_chap;
1077         pv->chap_time = pts >= 0? pts : pv->pts_next;
1078     }
1079     prepare_ffmpeg_buffer( in );
1080     decodeFrame( pv, in->data, in->size );
1081     hb_buffer_close( &in );
1082     *buf_out = link_buf_list( pv );
1083     return HB_WORK_OK;
1084 }
1085
1086 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1087 {
1088     if ( decavcodecvInfo( w, info ) )
1089     {
1090         hb_work_private_t *pv = w->private_data;
1091         if ( ! pv->context )
1092         {
1093             init_ffmpeg_context( w );
1094         }
1095         // we have the frame duration in units of the 90KHz pts clock but
1096         // need it in units of the 27MHz MPEG clock. */
1097         info->rate = 27000000;
1098         info->rate_base = pv->duration * 300.;
1099         return 1;
1100     }
1101     return 0;
1102 }
1103
1104 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1105 {
1106     AVCodecContext *context = pv->context;
1107     int pos = 0;
1108
1109     while ( pos < size )
1110     {
1111         int16_t *buffer = pv->buffer;
1112         if ( buffer == NULL )
1113         {
1114             // XXX ffmpeg bug workaround
1115             // malloc a buffer for the audio decode. On an x86, ffmpeg
1116             // uses mmx/sse instructions on this buffer without checking
1117             // that it's 16 byte aligned and this will cause an abort if
1118             // the buffer is allocated on our stack. Rather than doing
1119             // complicated, machine dependent alignment here we use the
1120             // fact that malloc returns an aligned pointer on most architectures.
1121
1122             #ifdef SYS_CYGWIN
1123                 // Cygwin's malloc doesn't appear to return 16-byte aligned memory so use memalign instead.
1124                pv->buffer = memalign(16, AVCODEC_MAX_AUDIO_FRAME_SIZE);
1125             #else
1126                 pv->buffer = malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1127             #endif
1128
1129             buffer = pv->buffer;
1130         }
1131         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1132         int len = avcodec_decode_audio2( context, buffer, &out_size,
1133                                          data + pos, size - pos );
1134         if ( len <= 0 )
1135         {
1136             return;
1137         }
1138         pos += len;
1139         if( out_size > 0 )
1140         {
1141             // We require signed 16-bit ints for the output format. If
1142             // we got something different convert it.
1143             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1144             {
1145                 // Note: av_audio_convert seems to be a work-in-progress but
1146                 //       looks like it will eventually handle general audio
1147                 //       mixdowns which would allow us much more flexibility
1148                 //       in handling multichannel audio in HB. If we were doing
1149                 //       anything more complicated than a one-for-one format
1150                 //       conversion we'd probably want to cache the converter
1151                 //       context in the pv.
1152                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1153                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1154                                                               context->sample_fmt, 1,
1155                                                               NULL, 0 );
1156                 // get output buffer size (in 2-byte samples) then malloc a buffer
1157                 out_size = ( out_size * 2 ) / isamp;
1158                 buffer = malloc( out_size );
1159
1160                 // we're doing straight sample format conversion which behaves as if
1161                 // there were only one channel.
1162                 const void * const ibuf[6] = { pv->buffer };
1163                 void * const obuf[6] = { buffer };
1164                 const int istride[6] = { isamp };
1165                 const int ostride[6] = { 2 };
1166
1167                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1168                 av_audio_convert_free( ctx );
1169             }
1170             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1171
1172             // convert from bytes to total samples
1173             out_size >>= 1;
1174
1175             double pts = pv->pts_next;
1176             buf->start = pts;
1177             pts += out_size * pv->duration;
1178             buf->stop  = pts;
1179             pv->pts_next = pts;
1180
1181             float *fl32 = (float *)buf->data;
1182             int i;
1183             for( i = 0; i < out_size; ++i )
1184             {
1185                 fl32[i] = buffer[i];
1186             }
1187             hb_list_add( pv->list, buf );
1188
1189             // if we allocated a buffer for sample format conversion, free it
1190             if ( buffer != pv->buffer )
1191             {
1192                 free( buffer );
1193             }
1194         }
1195     }
1196 }
1197
1198 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1199                     hb_buffer_t **buf_out )
1200 {
1201     if ( (*buf_in)->size <= 0 )
1202     {
1203         /* EOF on input stream - send it downstream & say that we're done */
1204         *buf_out = *buf_in;
1205         *buf_in = NULL;
1206         return HB_WORK_DONE;
1207     }
1208
1209     hb_work_private_t *pv = w->private_data;
1210
1211     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1212     {
1213         // discard buffers that start before video time 0
1214         *buf_out = NULL;
1215         return HB_WORK_OK;
1216     }
1217
1218     if ( ! pv->context )
1219     {
1220         init_ffmpeg_context( w );
1221         // duration is a scaling factor to go from #bytes in the decoded
1222         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1223         // total samples to per-channel samples. 'sample_rate' converts
1224         // per-channel samples to seconds per sample and the 90000
1225         // is mpeg ticks per second.
1226         pv->duration = 90000. /
1227                     (double)( pv->context->sample_rate * pv->context->channels );
1228     }
1229     hb_buffer_t *in = *buf_in;
1230
1231     // if the packet has a timestamp use it if we don't have a timestamp yet
1232     // or if there's been a timing discontinuity of more than 100ms.
1233     if ( in->start >= 0 &&
1234          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1235     {
1236         pv->pts_next = in->start;
1237     }
1238     prepare_ffmpeg_buffer( in );
1239     decodeAudio( pv, in->data, in->size );
1240     *buf_out = link_buf_list( pv );
1241
1242     return HB_WORK_OK;
1243 }
1244
1245 hb_work_object_t hb_decavcodecvi =
1246 {
1247     WORK_DECAVCODECVI,
1248     "Video decoder (ffmpeg streams)",
1249     decavcodecviInit,
1250     decavcodecviWork,
1251     decavcodecClose,
1252     decavcodecviInfo,
1253     decavcodecvBSInfo
1254 };
1255
1256 hb_work_object_t hb_decavcodecai =
1257 {
1258     WORK_DECAVCODECAI,
1259     "Audio decoder (ffmpeg streams)",
1260     decavcodecviInit,
1261     decavcodecaiWork,
1262     decavcodecClose,
1263     decavcodecInfo,
1264     decavcodecBSInfo
1265 };