1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
7 /* This module is Handbrake's interface to the ffmpeg decoder library
8 (libavcodec & small parts of libavformat). It contains four Handbrake
11 decavcodec connects HB to an ffmpeg audio decoder
12 decavcodecv connects HB to an ffmpeg video decoder
14 (Two different routines are needed because the ffmpeg library
15 has different decoder calling conventions for audio & video.
16 The audio decoder should have had its name changed to "decavcodeca"
17 but I got lazy.) These work objects are self-contained & follow all
18 of HB's conventions for a decoder module. They can be used like
19 any other HB decoder (deca52, decmpeg2, etc.).
21 decavcodecai "internal" (incestuous?) version of decavcodec
22 decavcodecvi "internal" (incestuous?) version of decavcodecv
24 These routine are functionally equivalent to the routines above but
25 can only be used by the ffmpeg-based stream reader in libhb/stream.c.
26 The reason they exist is because the ffmpeg library leaves some of
27 the information needed by the decoder in the AVStream (the data
28 structure used by the stream reader) and we need to retrieve it
29 to successfully decode frames. But in HB the reader and decoder
30 modules are in completely separate threads and nothing goes between
31 them but hb_buffers containing frames to be decoded. I.e., there's
32 no easy way for the ffmpeg stream reader to pass a pointer to its
33 AVStream over to the ffmpeg video or audio decoder. So the *i work
34 objects use a private back door to the stream reader to get access
35 to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
36 and the codec_param passed to these work objects is the key to this
37 back door (it's basically an index that allows the correct AVStream
40 The normal & *i objects share a lot of code (the basic frame decoding
41 and bitstream info code is factored out into subroutines that can be
42 called by either) but the top level routines of the *i objects
43 (decavcodecviWork, decavcodecviInfo, etc.) are different because:
44 1) they *have* to use the AVCodecContext that's contained in the
45 reader's AVStream rather than just allocating & using their own,
46 2) the Info routines have access to stuff kept in the AVStream in addition
47 to stuff kept in the AVCodecContext. This shouldn't be necessary but
48 crucial information like video frame rate that should be in the
49 AVCodecContext is either missing or wrong in the version of ffmpeg
50 we're currently using.
52 A consequence of the above is that the non-i work objects *can't* use
53 information from the AVStream because there isn't one - they get their
54 data from either the dvd reader or the mpeg reader, not the ffmpeg stream
55 reader. That means that they have to make up for deficiencies in the
56 AVCodecContext info by using stuff kept in the HB "title" struct. It
57 also means that ffmpeg codecs that randomly scatter state needed by
58 the decoder across both the AVCodecContext & the AVStream (e.g., the
59 VC1 decoder) can't easily be used by the HB mpeg stream reader.
65 #include "libavcodec/audioconvert.h"
67 static int decavcodecInit( hb_work_object_t *, hb_job_t * );
68 static int decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
69 static void decavcodecClose( hb_work_object_t * );
70 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
71 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
73 hb_work_object_t hb_decavcodec =
76 "MPGA decoder (libavcodec)",
86 // there are nheap items on the heap indexed 1..nheap (i.e., top of
87 // heap is 1). The 0th slot is unused - a marker is put there to check
88 // for overwrite errs.
89 int64_t h[HEAP_SIZE+1];
93 struct hb_work_private_s
96 AVCodecContext *context;
97 AVCodecParserContext *parser;
99 double duration; // frame duration (for video)
100 double pts_next; // next pts we expect to generate
101 int64_t pts; // (video) pts passing from parser to decoder
102 int64_t chap_time; // time of next chap mark (if new_chap != 0)
103 int new_chap; // output chapter mark pending
106 uint32_t decode_errors;
107 int brokenByMicrosoft; // video stream may contain packed b-frames
108 hb_buffer_t* delayq[HEAP_SIZE];
111 struct SwsContext *sws_context; // if we have to rescale or convert color space
112 hb_downmix_t *downmix;
113 hb_sample_t *downmix_buffer;
115 hb_chan_map_t *out_map;
118 static void decodeAudio( hb_audio_t * audio, hb_work_private_t *pv, uint8_t *data, int size );
119 static hb_buffer_t *link_buf_list( hb_work_private_t *pv );
122 static int64_t heap_pop( pts_heap_t *heap )
126 if ( heap->nheap <= 0 )
131 // return the top of the heap then put the bottom element on top,
132 // decrease the heap size by one & rebalence the heap.
135 int64_t v = heap->h[heap->nheap--];
137 int child = parent << 1;
138 while ( child <= heap->nheap )
140 // find the smallest of the two children of parent
141 if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
144 if (v <= heap->h[child])
145 // new item is smaller than either child so it's the new parent.
148 // smallest child is smaller than new item so move it up then
149 // check its children.
150 int64_t hp = heap->h[child];
151 heap->h[parent] = hp;
159 static void heap_push( pts_heap_t *heap, int64_t v )
161 if ( heap->nheap < HEAP_SIZE )
166 // stick the new value on the bottom of the heap then bubble it
167 // up to its correct spot.
168 int child = heap->nheap;
170 int parent = child >> 1;
171 if (heap->h[parent] <= v)
174 int64_t hp = heap->h[parent];
182 /***********************************************************************
183 * hb_work_decavcodec_init
184 ***********************************************************************
186 **********************************************************************/
187 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
191 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
192 w->private_data = pv;
195 pv->list = hb_list_init();
197 int codec_id = w->codec_param;
200 codec_id = CODEC_ID_MP2;
202 codec = avcodec_find_decoder( codec_id );
203 pv->parser = av_parser_init( codec_id );
205 pv->context = avcodec_alloc_context();
206 hb_avcodec_open( pv->context, codec );
208 if ( w->audio != NULL )
210 if ( w->audio->config.out.codec == HB_ACODEC_AC3 )
212 // ffmpegs audio encoder expect an smpte chan map as input.
213 // So we need to map the decoders output to smpte.
214 pv->out_map = &hb_smpte_chan_map;
218 pv->out_map = &hb_qt_chan_map;
220 if ( hb_need_downmix( w->audio->config.in.channel_layout,
221 w->audio->config.out.mixdown) )
223 pv->downmix = hb_downmix_init(w->audio->config.in.channel_layout,
224 w->audio->config.out.mixdown);
225 hb_downmix_set_chan_map( pv->downmix, &hb_smpte_chan_map, pv->out_map );
232 /***********************************************************************
234 ***********************************************************************
236 **********************************************************************/
237 static void decavcodecClose( hb_work_object_t * w )
239 hb_work_private_t * pv = w->private_data;
243 if ( pv->job && pv->context && pv->context->codec )
245 hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
246 pv->context->codec->name, pv->nframes, pv->decode_errors,
249 if ( pv->sws_context )
251 sws_freeContext( pv->sws_context );
255 av_parser_close(pv->parser);
257 if ( pv->context && pv->context->codec )
259 hb_avcodec_close( pv->context );
263 hb_list_close( &pv->list );
267 av_free( pv->buffer );
272 hb_downmix_close( &(pv->downmix) );
274 if ( pv->downmix_buffer )
276 free( pv->downmix_buffer );
277 pv->downmix_buffer = NULL;
280 w->private_data = NULL;
284 /***********************************************************************
286 ***********************************************************************
288 **********************************************************************/
289 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
290 hb_buffer_t ** buf_out )
292 hb_work_private_t * pv = w->private_data;
293 hb_buffer_t * in = *buf_in;
297 /* EOF on input stream - send it downstream & say that we're done */
305 if ( in->start < -1 && pv->pts_next <= 0 )
307 // discard buffers that start before video time 0
311 // if the packet has a timestamp use it
312 if ( in->start != -1 )
314 pv->pts_next = in->start;
318 for ( pos = 0; pos < in->size; pos += len )
320 uint8_t *parser_output_buffer;
321 int parser_output_buffer_len;
322 int64_t cur = pv->pts_next;
324 if ( pv->parser != NULL )
326 len = av_parser_parse2( pv->parser, pv->context,
327 &parser_output_buffer, &parser_output_buffer_len,
328 in->data + pos, in->size - pos, cur, cur, AV_NOPTS_VALUE );
332 parser_output_buffer = in->data;
333 len = parser_output_buffer_len = in->size;
335 if (parser_output_buffer_len)
337 // set the duration on every frame since the stream format can
338 // change (it shouldn't but there's no way to guarantee it).
339 // duration is a scaling factor to go from #bytes in the decoded
340 // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
341 // total samples to per-channel samples. 'sample_rate' converts
342 // per-channel samples to seconds per sample and the 90000
343 // is mpeg ticks per second.
344 if ( pv->context->sample_rate && pv->context->channels )
346 pv->duration = 90000. /
347 (double)( pv->context->sample_rate * pv->context->channels );
349 decodeAudio( w->audio, pv, parser_output_buffer, parser_output_buffer_len );
352 *buf_out = link_buf_list( pv );
356 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
358 hb_work_private_t *pv = w->private_data;
360 memset( info, 0, sizeof(*info) );
362 if ( pv && pv->context )
364 AVCodecContext *context = pv->context;
365 info->bitrate = context->bit_rate;
366 info->rate = context->time_base.num;
367 info->rate_base = context->time_base.den;
368 info->profile = context->profile;
369 info->level = context->level;
375 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
376 hb_work_info_t *info )
378 hb_work_private_t *pv = w->private_data;
381 memset( info, 0, sizeof(*info) );
383 if ( pv && pv->context )
385 return decavcodecInfo( w, info );
388 // We should parse the bitstream to find its parameters but for right
389 // now we just return dummy values if there's a codec that will handle it.
390 AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
394 // there's no ffmpeg codec for this audio type - give up
398 static char codec_name[64];
399 info->name = strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
401 AVCodecParserContext *parser = av_parser_init( codec->id );
402 AVCodecContext *context = avcodec_alloc_context();
403 hb_avcodec_open( context, codec );
404 uint8_t *buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
405 int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
406 unsigned char *pbuffer;
407 int pos, pbuffer_size;
409 while ( buf && !ret )
412 while ( pos < buf->size )
418 len = av_parser_parse2( parser, context, &pbuffer,
419 &pbuffer_size, buf->data + pos,
420 buf->size - pos, buf->start,
421 buf->start, AV_NOPTS_VALUE );
426 len = pbuffer_size = buf->size;
429 if ( pbuffer_size > 0 )
432 av_init_packet( &avp );
434 avp.size = pbuffer_size;
436 len = avcodec_decode_audio3( context, (int16_t*)buffer,
438 if ( len > 0 && context->sample_rate > 0 )
440 info->bitrate = context->bit_rate;
441 info->rate = context->sample_rate;
443 info->channel_layout =
444 hb_ff_layout_xlat(context->channel_layout,
455 if ( parser != NULL )
456 av_parser_close( parser );
457 hb_avcodec_close( context );
461 /* -------------------------------------------------------------
462 * General purpose video decoder using libavcodec
465 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
468 if ( dstride == sstride )
470 memcpy( dst, src, dstride * h );
471 return dst + dstride * h;
473 int lbytes = dstride <= sstride? dstride : sstride;
476 memcpy( dst, src, lbytes );
483 // copy one video frame into an HB buf. If the frame isn't in our color space
484 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
485 // Otherwise just copy the bits.
486 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
488 AVCodecContext *context = pv->context;
492 // if the dimensions are odd, drop the lsb since h264 requires that
493 // both width and height be even.
494 w = ( context->width >> 1 ) << 1;
495 h = ( context->height >> 1 ) << 1;
499 w = pv->job->title->width;
500 h = pv->job->title->height;
502 hb_buffer_t *buf = hb_video_buffer_init( w, h );
503 uint8_t *dst = buf->data;
505 if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
506 h != context->height )
508 // have to convert to our internal color space and/or rescale
510 avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
512 if ( ! pv->sws_context )
514 pv->sws_context = hb_sws_get_context( context->width, context->height, context->pix_fmt,
515 w, h, PIX_FMT_YUV420P,
516 SWS_LANCZOS|SWS_ACCURATE_RND);
518 sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
519 dstpic.data, dstpic.linesize );
523 dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
524 w = (w + 1) >> 1; h = (h + 1) >> 1;
525 dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
526 dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
531 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
533 hb_work_private_t *pv = context->opaque;
534 frame->pts = pv->pts;
536 return avcodec_default_get_buffer( context, frame );
539 static int reget_frame_buf( AVCodecContext *context, AVFrame *frame )
541 hb_work_private_t *pv = context->opaque;
542 frame->pts = pv->pts;
544 return avcodec_default_reget_buffer( context, frame );
547 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
554 c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
557 hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
558 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
562 hb_log( "%s: Chapter %d at frame %u time %"PRId64,
563 pv->context->codec->name, chap_num, pv->nframes, pts );
567 static void flushDelayQueue( hb_work_private_t *pv )
570 int slot = pv->nframes & (HEAP_SIZE-1);
572 // flush all the video packets left on our timestamp-reordering delay q
573 while ( ( buf = pv->delayq[slot] ) != NULL )
575 buf->start = heap_pop( &pv->pts_heap );
576 hb_list_add( pv->list, buf );
577 pv->delayq[slot] = NULL;
578 slot = ( slot + 1 ) & (HEAP_SIZE-1);
582 #define TOP_FIRST PIC_FLAG_TOP_FIELD_FIRST
583 #define PROGRESSIVE PIC_FLAG_PROGRESSIVE_FRAME
584 #define REPEAT_FIRST PIC_FLAG_REPEAT_FIRST_FIELD
592 static void checkCadence( int * cadence, uint16_t flags, int64_t start )
594 /* Rotate the cadence tracking. */
596 for(i=11; i > 0; i--)
598 cadence[i] = cadence[i-1];
601 if ( !(flags & PROGRESSIVE) && !(flags & TOP_FIRST) )
603 /* Not progressive, not top first...
604 That means it's probably bottom
605 first, 2 fields displayed.
607 //hb_log("MPEG2 Flag: Bottom field first, 2 fields displayed.");
610 else if ( !(flags & PROGRESSIVE) && (flags & TOP_FIRST) )
612 /* Not progressive, top is first,
613 Two fields displayed.
615 //hb_log("MPEG2 Flag: Top field first, 2 fields displayed.");
618 else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) )
620 /* Progressive, but noting else.
621 That means Bottom first,
624 //hb_log("MPEG2 Flag: Progressive. Bottom field first, 2 fields displayed.");
625 cadence[0] = BT_PROG;
627 else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) )
629 /* Progressive, and repeat. .
630 That means Bottom first,
633 //hb_log("MPEG2 Flag: Progressive repeat. Bottom field first, 3 fields displayed.");
634 cadence[0] = BTB_PROG;
636 else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) )
638 /* Progressive, top first.
639 That means top first,
642 //hb_log("MPEG2 Flag: Progressive. Top field first, 2 fields displayed.");
643 cadence[0] = TB_PROG;
645 else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) )
647 /* Progressive, top, repeat.
648 That means top first,
651 //hb_log("MPEG2 Flag: Progressive repeat. Top field first, 3 fields displayed.");
652 cadence[0] = TBT_PROG;
655 if ( (cadence[2] <= TB) && (cadence[1] <= TB) && (cadence[0] > TB) && (cadence[11]) )
656 hb_log("%fs: Video -> Film", (float)start / 90000);
657 if ( (cadence[2] > TB) && (cadence[1] <= TB) && (cadence[0] <= TB) && (cadence[11]) )
658 hb_log("%fs: Film -> Video", (float)start / 90000);
662 * Decodes a video frame from the specified raw packet data ('data', 'size', 'sequence').
663 * The output of this function is stored in 'pv->list', which contains a list
664 * of zero or more decoded packets.
666 * The returned packets are guaranteed to have their timestamps in the correct order,
667 * even if the original packets decoded by libavcodec have misordered timestamps,
668 * due to the use of 'packed B-frames'.
670 * Internally the set of decoded packets may be buffered in 'pv->delayq'
671 * until enough packets have been decoded so that the timestamps can be
672 * correctly rewritten, if this is necessary.
674 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size, int sequence )
676 int got_picture, oldlevel = 0;
680 if ( global_verbosity_level <= 1 )
682 oldlevel = av_log_get_level();
683 av_log_set_level( AV_LOG_QUIET );
686 av_init_packet( &avp );
689 if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
693 if ( global_verbosity_level <= 1 )
695 av_log_set_level( oldlevel );
701 // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
702 // packet had a pts we handed it to av_parser_parse (if the packet had
703 // no pts we set it to -1 but before the parse we can't distinguish between
704 // the start of a video frame with no pts & an intermediate packet of
705 // some frame which never has a pts). we hope that when parse returns
706 // the frame to us the pts we originally handed it will be in parser->pts.
707 // we put this pts into pv->pts so that when a avcodec_decode_video
708 // finally gets around to allocating an AVFrame to hold the decoded
709 // frame we can stuff that pts into the frame. if all of these relays
710 // worked at this point frame.pts should hold the frame's pts from the
711 // original data stream or -1 if it didn't have one. in the latter case
712 // we generate the next pts in sequence for it.
713 double frame_dur = pv->duration;
714 if ( frame_dur <= 0 )
716 frame_dur = 90000. * (double)pv->context->time_base.num /
717 (double)pv->context->time_base.den;
718 pv->duration = frame_dur;
720 if ( pv->context->ticks_per_frame > 1 )
724 if ( frame.repeat_pict )
726 frame_dur += frame.repeat_pict * pv->duration;
728 // XXX Unlike every other video decoder, the Raw decoder doesn't
729 // use the standard buffer allocation routines so we never
730 // get to put a PTS in the frame. Do it now.
731 if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
736 // If there was no pts for this frame, assume constant frame rate
737 // video & estimate the next frame time from the last & duration.
738 double pts = frame.pts;
743 pv->pts_next = pts + frame_dur;
745 if ( frame.top_field_first )
747 flags |= PIC_FLAG_TOP_FIELD_FIRST;
749 if ( !frame.interlaced_frame )
751 flags |= PIC_FLAG_PROGRESSIVE_FRAME;
753 if ( frame.repeat_pict )
755 flags |= PIC_FLAG_REPEAT_FIRST_FIELD;
760 // if we're doing a scan or this content couldn't have been broken
761 // by Microsoft we don't worry about timestamp reordering
762 if ( ! pv->job || ! pv->brokenByMicrosoft )
764 buf = copy_frame( pv, &frame );
766 buf->sequence = sequence;
768 if ( pv->new_chap && buf->start >= pv->chap_time )
770 buf->new_chap = pv->new_chap;
773 log_chapter( pv, buf->new_chap, buf->start );
775 else if ( pv->nframes == 0 && pv->job )
777 log_chapter( pv, pv->job->chapter_start, buf->start );
779 checkCadence( pv->cadence, buf->flags, buf->start );
780 hb_list_add( pv->list, buf );
785 // XXX This following probably addresses a libavcodec bug but I don't
786 // see an easy fix so we workaround it here.
788 // The M$ 'packed B-frames' atrocity results in decoded frames with
789 // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
790 // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
791 // The frames are actually delivered in the right order but with
792 // the wrong timestamp. To get the correct timestamp attached to
793 // each frame we have a delay queue (longer than the max number of
794 // b-frames) & a sorting heap for the timestamps. As each frame
795 // comes out of the decoder the oldest frame in the queue is removed
796 // and associated with the smallest timestamp. Then the new frame is
797 // added to the queue & its timestamp is pushed on the heap.
798 // This does nothing if the timestamps are correct (i.e., the video
799 // uses a codec that Micro$oft hasn't broken yet) but the frames
800 // get timestamped correctly even when M$ has munged them.
802 // remove the oldest picture from the frame queue (if any) &
803 // give it the smallest timestamp from our heap. The queue size
804 // is a power of two so we get the slot of the oldest by masking
805 // the frame count & this will become the slot of the newest
806 // once we've removed & processed the oldest.
807 int slot = pv->nframes & (HEAP_SIZE-1);
808 if ( ( buf = pv->delayq[slot] ) != NULL )
810 buf->start = heap_pop( &pv->pts_heap );
812 if ( pv->new_chap && buf->start >= pv->chap_time )
814 buf->new_chap = pv->new_chap;
817 log_chapter( pv, buf->new_chap, buf->start );
819 else if ( pv->nframes == 0 && pv->job )
821 log_chapter( pv, pv->job->chapter_start, buf->start );
823 checkCadence( pv->cadence, buf->flags, buf->start );
824 hb_list_add( pv->list, buf );
827 // add the new frame to the delayq & push its timestamp on the heap
828 buf = copy_frame( pv, &frame );
829 buf->sequence = sequence;
831 pv->delayq[slot] = buf;
832 heap_push( &pv->pts_heap, pts );
840 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size, int sequence,
841 int64_t pts, int64_t dts )
844 * The following loop is a do..while because we need to handle both
845 * data & the flush at the end (signaled by size=0). At the end there's
846 * generally a frame in the parser & one or more frames in the decoder
847 * (depending on the bframes setting).
853 int len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
854 data + pos, size - pos, pts, dts, AV_NOPTS_VALUE );
859 pv->pts = pv->parser->pts;
860 decodeFrame( pv, pout, pout_len, sequence );
862 } while ( pos < size );
864 /* the stuff above flushed the parser, now flush the decoder */
867 while ( decodeFrame( pv, NULL, 0, sequence ) )
870 flushDelayQueue( pv );
875 * Removes all packets from 'pv->list', links them together into
876 * a linked-list, and returns the first packet in the list.
878 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
880 hb_buffer_t *head = hb_list_item( pv->list, 0 );
884 hb_list_rem( pv->list, head );
886 hb_buffer_t *last = head, *buf;
888 while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
890 hb_list_rem( pv->list, buf );
899 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
902 hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
903 w->private_data = pv;
905 pv->list = hb_list_init();
907 int codec_id = w->codec_param;
908 pv->parser = av_parser_init( codec_id );
909 pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
911 /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
912 pv->context->opaque = pv;
913 pv->context->get_buffer = get_frame_buf;
914 pv->context->reget_buffer = reget_frame_buf;
919 static int next_hdr( hb_buffer_t *in, int offset )
921 uint8_t *dat = in->data;
922 uint16_t last2 = 0xffff;
923 for ( ; in->size - offset > 1; ++offset )
925 if ( last2 == 0 && dat[offset] == 0x01 )
926 // found an mpeg start code
929 last2 = ( last2 << 8 ) | dat[offset];
935 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
937 if ( in->size - offset < 4 )
938 // not enough room for an mpeg start code
941 for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
943 if ( in->data[offset+3] == hdr_type )
950 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
952 hb_work_private_t *pv = w->private_data;
954 // we can't call the avstream funcs but the read_header func in the
955 // AVInputFormat may set up some state in the AVContext. In particular
956 // vc1t_read_header allocates 'extradata' to deal with header issues
957 // related to Microsoft's bizarre engineering notions. We alloc a chunk
958 // of space to make vc1 work then associate the codec with the context.
959 if ( w->codec_param != CODEC_ID_VC1 )
961 // we haven't been inflicted with M$ - allocate a little space as
962 // a marker and return success.
963 pv->context->extradata_size = 16;
964 pv->context->extradata = av_malloc(pv->context->extradata_size);
968 // find the start and and of the sequence header
970 if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
972 // didn't find start of seq hdr
975 if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
981 // find the start and and of the entry point header
983 if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
985 // didn't find start of entry point hdr
988 if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
994 // found both headers - allocate an extradata big enough to hold both
995 // then copy them into it.
996 pv->context->extradata_size = shdr_end + ehdr_end;
997 pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
998 memcpy( pv->context->extradata, in->data + shdr, shdr_end );
999 memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
1000 memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
1004 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1005 hb_buffer_t ** buf_out )
1007 hb_work_private_t *pv = w->private_data;
1008 hb_buffer_t *in = *buf_in;
1009 int64_t pts = AV_NOPTS_VALUE;
1014 /* if we got an empty buffer signaling end-of-stream send it downstream */
1015 if ( in->size == 0 )
1017 if ( pv->context->codec != NULL )
1019 decodeVideo( pv, in->data, in->size, in->sequence, pts, dts );
1021 hb_list_add( pv->list, in );
1022 *buf_out = link_buf_list( pv );
1023 return HB_WORK_DONE;
1026 // if this is the first frame open the codec (we have to wait for the
1027 // first frame because of M$ VC1 braindamage).
1028 if ( pv->context->extradata_size == 0 )
1030 if ( setup_extradata( w, in ) )
1032 // we didn't find the headers needed to set up extradata.
1033 // the codec will abort if we open it so just free the buf
1034 // and hope we eventually get the info we need.
1035 hb_buffer_close( &in );
1038 AVCodec *codec = avcodec_find_decoder( w->codec_param );
1039 // There's a mis-feature in ffmpeg that causes the context to be
1040 // incorrectly initialized the 1st time avcodec_open is called.
1041 // If you close it and open a 2nd time, it finishes the job.
1042 hb_avcodec_open( pv->context, codec );
1043 hb_avcodec_close( pv->context );
1044 hb_avcodec_open( pv->context, codec );
1047 if( in->start >= 0 )
1050 dts = in->renderOffset;
1054 pv->new_chap = in->new_chap;
1055 pv->chap_time = pts >= 0? pts : pv->pts_next;
1057 decodeVideo( pv, in->data, in->size, in->sequence, pts, dts );
1058 hb_buffer_close( &in );
1059 *buf_out = link_buf_list( pv );
1063 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
1065 hb_work_private_t *pv = w->private_data;
1067 memset( info, 0, sizeof(*info) );
1069 if ( pv && pv->context )
1071 AVCodecContext *context = pv->context;
1072 info->bitrate = context->bit_rate;
1073 info->width = context->width;
1074 info->height = context->height;
1076 /* ffmpeg gives the frame rate in frames per second while HB wants
1077 * it in units of the 27MHz MPEG clock. */
1078 info->rate = 27000000;
1079 info->rate_base = (int64_t)context->time_base.num * 27000000LL /
1080 context->time_base.den;
1081 if ( context->ticks_per_frame > 1 )
1083 // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
1084 // field rate rather than frame rate so convert back to frames.
1085 info->rate_base *= context->ticks_per_frame;
1088 info->pixel_aspect_width = context->sample_aspect_ratio.num;
1089 info->pixel_aspect_height = context->sample_aspect_ratio.den;
1091 /* Sometimes there's no pixel aspect set in the source ffmpeg context
1092 * which appears to come from the video stream. In that case,
1093 * try the pixel aspect in AVStream (which appears to come from
1094 * the container). Else assume a 1:1 PAR. */
1095 if ( info->pixel_aspect_width == 0 ||
1096 info->pixel_aspect_height == 0 )
1098 // There will not be an ffmpeg stream if the file is TS
1099 AVStream *st = hb_ffmpeg_avstream( w->codec_param );
1100 info->pixel_aspect_width = st && st->sample_aspect_ratio.num ?
1101 st->sample_aspect_ratio.num : 1;
1102 info->pixel_aspect_height = st && st->sample_aspect_ratio.den ?
1103 st->sample_aspect_ratio.den : 1;
1105 /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
1106 * Display Aspect Ratio so we convert by scaling by the Storage
1107 * Aspect Ratio (w/h). We do the calc in floating point to get the
1108 * rounding right. */
1109 info->aspect = (double)info->pixel_aspect_width *
1110 (double)context->width /
1111 (double)info->pixel_aspect_height /
1112 (double)context->height;
1114 info->profile = context->profile;
1115 info->level = context->level;
1116 info->name = context->codec->name;
1122 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
1123 hb_work_info_t *info )
1128 hb_work_object_t hb_decavcodecv =
1131 "Video decoder (libavcodec)",
1140 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
1141 // includes a parser and passes information from the parser to the decoder
1142 // via a codec context kept in the AVStream of the reader's AVFormatContext.
1143 // We *have* to use that codec context to decode the stream or we'll get
1144 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
1145 // codec context in our codec_param.
1147 // this routine gets the appropriate context pointer from the ffmpeg
1148 // stream reader. it can't be called until we get the first buffer because
1149 // we can't guarantee that reader will be called before the our init
1150 // routine and if our init is called first we'll get a pointer to the
1151 // old scan stream (which has already been closed).
1152 static void init_ffmpeg_context( hb_work_object_t *w )
1154 hb_work_private_t *pv = w->private_data;
1155 pv->context = hb_ffmpeg_context( w->codec_param );
1157 // during scan the decoder gets closed & reopened which will
1158 // close the codec so reopen it if it's not there
1159 if ( ! pv->context->codec )
1161 AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
1162 hb_avcodec_open( pv->context, codec );
1164 // set up our best guess at the frame duration.
1165 // the frame rate in the codec is usually bogus but it's sometimes
1166 // ok in the stream.
1167 AVStream *st = hb_ffmpeg_avstream( w->codec_param );
1169 if ( st->nb_frames && st->duration )
1171 // compute the average frame duration from the total number
1172 // of frames & the total duration.
1173 pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
1174 ( (double)st->nb_frames * (double)st->time_base.den );
1178 // XXX We don't have a frame count or duration so try to use the
1179 // far less reliable time base info in the stream.
1180 // Because the time bases are so screwed up, we only take values
1181 // in the range 8fps - 64fps.
1183 if ( st->avg_frame_rate.den * 64L > st->avg_frame_rate.num &&
1184 st->avg_frame_rate.num > st->avg_frame_rate.den * 8L )
1186 tb.num = st->avg_frame_rate.den;
1187 tb.den = st->avg_frame_rate.num;
1189 else if ( st->time_base.num * 64L > st->time_base.den &&
1190 st->time_base.den > st->time_base.num * 8L )
1194 else if ( st->r_frame_rate.den * 64L > st->r_frame_rate.num &&
1195 st->r_frame_rate.num > st->r_frame_rate.den * 8L )
1197 tb.num = st->r_frame_rate.den;
1198 tb.den = st->r_frame_rate.num;
1202 tb.num = 1001; /*XXX*/
1203 tb.den = 24000; /*XXX*/
1205 pv->duration = (double)tb.num / (double)tb.den;
1207 pv->duration *= 90000.;
1209 // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1210 pv->context->opaque = pv;
1211 pv->context->get_buffer = get_frame_buf;
1212 pv->context->reget_buffer = reget_frame_buf;
1214 // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1215 // b-frames abortion that messes up frame ordering and timestamps.
1216 // XXX ffmpeg knows which streams are broken but doesn't expose the
1217 // info externally. We should patch ffmpeg to add a flag to the
1218 // codec context for this but until then we mark all ffmpeg streams
1220 pv->brokenByMicrosoft = 1;
1223 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1225 // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1226 // will seg fault in odd, data dependent ways if it's not there. (my guess
1227 // is this is a case of a local performance optimization creating a global
1228 // performance degradation since all the time wasted by extraneous data
1229 // copies & memory zeroing has to be huge compared to the minor reduction
1230 // in inner-loop instructions this affords - modern cpus bottleneck on
1231 // memory bandwidth not instruction bandwidth).
1232 if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1234 // have to realloc to add the padding
1235 hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1237 memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1240 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1243 hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1244 w->private_data = pv;
1246 pv->list = hb_list_init();
1250 if ( w->audio != NULL )
1252 if ( w->audio->config.out.codec == HB_ACODEC_AC3 )
1254 // ffmpegs audio encoder expect an smpte chan map as input.
1255 // So we need to map the decoders output to smpte.
1256 pv->out_map = &hb_smpte_chan_map;
1260 pv->out_map = &hb_qt_chan_map;
1262 if ( hb_need_downmix( w->audio->config.in.channel_layout,
1263 w->audio->config.out.mixdown) )
1265 pv->downmix = hb_downmix_init(w->audio->config.in.channel_layout,
1266 w->audio->config.out.mixdown);
1267 hb_downmix_set_chan_map( pv->downmix, &hb_smpte_chan_map, pv->out_map );
1274 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1275 hb_buffer_t ** buf_out )
1277 hb_work_private_t *pv = w->private_data;
1278 hb_buffer_t *in = *buf_in;
1281 /* if we got an empty buffer signaling end-of-stream send it downstream */
1282 if ( in->size == 0 )
1284 /* flush any frames left in the decoder */
1285 while ( pv->context && decodeFrame( pv, NULL, 0, in->sequence ) )
1288 flushDelayQueue( pv );
1289 hb_list_add( pv->list, in );
1290 *buf_out = link_buf_list( pv );
1291 return HB_WORK_DONE;
1294 if ( ! pv->context )
1296 init_ffmpeg_context( w );
1299 int64_t pts = in->start;
1302 // use the first timestamp as our 'next expected' pts
1303 if ( pv->pts_next < 0 )
1312 pv->new_chap = in->new_chap;
1313 pv->chap_time = pts >= 0? pts : pv->pts_next;
1315 prepare_ffmpeg_buffer( in );
1316 decodeFrame( pv, in->data, in->size, in->sequence );
1317 hb_buffer_close( &in );
1318 *buf_out = link_buf_list( pv );
1322 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1324 if ( decavcodecvInfo( w, info ) )
1326 hb_work_private_t *pv = w->private_data;
1327 if ( ! pv->context )
1329 init_ffmpeg_context( w );
1331 // we have the frame duration in units of the 90KHz pts clock but
1332 // need it in units of the 27MHz MPEG clock. */
1333 info->rate = 27000000;
1334 info->rate_base = pv->duration * 300.;
1340 static void decodeAudio( hb_audio_t * audio, hb_work_private_t *pv, uint8_t *data, int size )
1342 AVCodecContext *context = pv->context;
1344 int loop_limit = 256;
1346 while ( pos < size )
1348 int16_t *buffer = pv->buffer;
1349 if ( buffer == NULL )
1351 pv->buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1352 buffer = pv->buffer;
1356 av_init_packet( &avp );
1357 avp.data = data + pos;
1358 avp.size = size - pos;
1360 int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1362 int len = avcodec_decode_audio3( context, buffer, &out_size, &avp );
1369 if ( !(loop_limit--) )
1378 // We require signed 16-bit ints for the output format. If
1379 // we got something different convert it.
1380 if ( context->sample_fmt != SAMPLE_FMT_S16 )
1382 // Note: av_audio_convert seems to be a work-in-progress but
1383 // looks like it will eventually handle general audio
1384 // mixdowns which would allow us much more flexibility
1385 // in handling multichannel audio in HB. If we were doing
1386 // anything more complicated than a one-for-one format
1387 // conversion we'd probably want to cache the converter
1388 // context in the pv.
1389 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1390 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1391 context->sample_fmt, 1,
1393 // get output buffer size (in 2-byte samples) then malloc a buffer
1394 nsamples = out_size / isamp;
1395 buffer = av_malloc( nsamples * 2 );
1397 // we're doing straight sample format conversion which behaves as if
1398 // there were only one channel.
1399 const void * const ibuf[6] = { pv->buffer };
1400 void * const obuf[6] = { buffer };
1401 const int istride[6] = { isamp };
1402 const int ostride[6] = { 2 };
1404 av_audio_convert( ctx, obuf, ostride, ibuf, istride, nsamples );
1405 av_audio_convert_free( ctx );
1409 nsamples = out_size / 2;
1416 pv->downmix_buffer = realloc(pv->downmix_buffer, nsamples * sizeof(hb_sample_t));
1419 for( i = 0; i < nsamples; ++i )
1421 pv->downmix_buffer[i] = buffer[i];
1424 int n_ch_samples = nsamples / context->channels;
1425 int channels = HB_AMIXDOWN_GET_DISCRETE_CHANNEL_COUNT(audio->config.out.mixdown);
1427 buf = hb_buffer_init( n_ch_samples * channels * sizeof(float) );
1428 hb_sample_t *samples = (hb_sample_t *)buf->data;
1429 hb_downmix(pv->downmix, samples, pv->downmix_buffer, n_ch_samples);
1433 buf = hb_buffer_init( nsamples * sizeof(float) );
1434 float *fl32 = (float *)buf->data;
1436 for( i = 0; i < nsamples; ++i )
1438 fl32[i] = buffer[i];
1440 int n_ch_samples = nsamples / context->channels;
1441 hb_layout_remap( &hb_smpte_chan_map, pv->out_map,
1442 audio->config.in.channel_layout,
1443 fl32, n_ch_samples );
1446 double pts = pv->pts_next;
1448 pts += nsamples * pv->duration;
1452 hb_list_add( pv->list, buf );
1454 // if we allocated a buffer for sample format conversion, free it
1455 if ( buffer != pv->buffer )
1463 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1464 hb_buffer_t **buf_out )
1466 if ( (*buf_in)->size <= 0 )
1468 /* EOF on input stream - send it downstream & say that we're done */
1471 return HB_WORK_DONE;
1474 hb_work_private_t *pv = w->private_data;
1476 if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1478 // discard buffers that start before video time 0
1483 if ( ! pv->context )
1485 init_ffmpeg_context( w );
1486 // duration is a scaling factor to go from #bytes in the decoded
1487 // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1488 // total samples to per-channel samples. 'sample_rate' converts
1489 // per-channel samples to seconds per sample and the 90000
1490 // is mpeg ticks per second.
1491 pv->duration = 90000. /
1492 (double)( pv->context->sample_rate * pv->context->channels );
1494 hb_buffer_t *in = *buf_in;
1496 // if the packet has a timestamp use it if we don't have a timestamp yet
1497 // or if there's been a timing discontinuity of more than 100ms.
1498 if ( in->start >= 0 &&
1499 ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1501 pv->pts_next = in->start;
1503 prepare_ffmpeg_buffer( in );
1504 decodeAudio( w->audio, pv, in->data, in->size );
1505 *buf_out = link_buf_list( pv );
1510 hb_work_object_t hb_decavcodecvi =
1513 "Video decoder (ffmpeg streams)",
1521 hb_work_object_t hb_decavcodecai =
1524 "Audio decoder (ffmpeg streams)",