1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
55 uint32_t frames_split; // number of frames we had to split
56 int chap_mark; // saved chap mark when we're propagating it
57 int64_t last_stop; // Debugging - stop time of previous input frame
63 } frame_info[FRAME_INFO_SIZE];
68 /***********************************************************************
69 * hb_work_encx264_init
70 ***********************************************************************
72 **********************************************************************/
73 int encx264Init( hb_work_object_t * w, hb_job_t * job )
79 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
84 memset( pv->filename, 0, 1024 );
85 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
87 x264_param_default( ¶m );
89 /* Temporarily default mbtree to off for baseline,
90 overridable through x264 option strings. */
91 if( job->x264opts != NULL && *job->x264opts != '\0' )
93 char *x264opts, *x264opts_start;
95 x264opts = x264opts_start = strdup(job->x264opts);
97 while( x264opts_start && *x264opts )
99 char *name = x264opts;
102 x264opts += strcspn( x264opts, ":" );
109 value = strchr( name, '=' );
117 When B-frames are enabled, the max frame count increments
118 by 1 (regardless of the number of B-frames). If you don't
119 change the duration of the video track when you mux, libmp4
120 barfs. So, check if the x264opts aren't using B-frames, and
121 when they aren't, set the boolean job->areBframes as false.
123 if( !( strcmp( name, "bframes" ) ) )
125 if( atoi( value ) == 0 )
127 param.rc.b_mb_tree = 0;
134 param.analyse.b_psnr = 1;
135 param.analyse.b_ssim = 1;
137 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
138 param.i_width = job->width;
139 param.i_height = job->height;
140 param.i_fps_num = job->vrate;
141 param.i_fps_den = job->vrate_base;
143 /* Disable annexb. Inserts size into nal header instead of start code */
146 /* Set min:max key intervals ratio to 1:10 of fps.
147 * This section is skipped if fps=25 (default).
149 if (job->vrate_base != 1080000)
151 if (job->pass == 2 && !job->cfr )
153 /* Even though the framerate might be different due to VFR,
154 we still want the same keyframe intervals as the 1st pass,
155 so the 1st pass stats won't conflict on frame decisions. */
156 hb_interjob_t * interjob = hb_interjob_get( job->h );
157 param.i_keyint_min = ( interjob->vrate / interjob->vrate_base ) + 1;
158 param.i_keyint_max = ( 10 * interjob->vrate / interjob->vrate_base ) + 1;
162 int fps = job->vrate / job->vrate_base;
164 /* adjust +1 when fps has remainder to bump
165 { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
166 if (job->vrate % job->vrate_base)
169 param.i_keyint_min = fps;
170 param.i_keyint_max = fps * 10;
173 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
176 param.i_log_level = X264_LOG_INFO;
177 if( job->h264_level )
180 param.i_level_idc = job->h264_level;
181 hb_log( "encx264: encoding at level %i",
185 /* B-frames are on by default.*/
189 This section passes the string x264opts to libx264 for parsing into
190 parameter names and values.
192 The string is set up like this:
193 option1=value1:option2=value 2
195 So, you have to iterate through based on the colons, and then put
196 the left side of the equals sign in "name" and the right side into
197 "value." Then you hand those strings off to x264 for interpretation.
199 This is all based on the universal x264 option handling Loren
200 Merritt implemented in the Mplayer/Mencoder project.
203 if( job->x264opts != NULL && *job->x264opts != '\0' )
205 char *x264opts, *x264opts_start;
207 x264opts = x264opts_start = strdup(job->x264opts);
209 while( x264opts_start && *x264opts )
211 char *name = x264opts;
215 x264opts += strcspn( x264opts, ":" );
222 value = strchr( name, '=' );
230 When B-frames are enabled, the max frame count increments
231 by 1 (regardless of the number of B-frames). If you don't
232 change the duration of the video track when you mux, libmp4
233 barfs. So, check if the x264opts aren't using B-frames, and
234 when they aren't, set the boolean job->areBframes as false.
236 if( !( strcmp( name, "bframes" ) ) )
238 if( atoi( value ) == 0 )
244 /* Note b-pyramid here, so the initial delay can be doubled */
245 if( !( strcmp( name, "b-pyramid" ) ) )
249 if( atoi( value ) > 0 )
260 /* Here's where the strings are passed to libx264 for parsing. */
261 ret = x264_param_parse( ¶m, name, value );
263 /* Let x264 sanity check the options for us*/
264 if( ret == X264_PARAM_BAD_NAME )
265 hb_log( "x264 options: Unknown suboption %s", name );
266 if( ret == X264_PARAM_BAD_VALUE )
267 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
269 free(x264opts_start);
272 /* set up the VUI color model & gamma to match what the COLR atom
273 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
274 if( job->color_matrix == 1 )
276 // ITU BT.601 DVD or SD TV content
277 param.vui.i_colorprim = 6;
278 param.vui.i_transfer = 1;
279 param.vui.i_colmatrix = 6;
281 else if( job->color_matrix == 2 )
283 // ITU BT.709 HD content
284 param.vui.i_colorprim = 1;
285 param.vui.i_transfer = 1;
286 param.vui.i_colmatrix = 1;
288 else if ( job->title->width >= 1280 || job->title->height >= 720 )
290 // we guess that 720p or above is ITU BT.709 HD content
291 param.vui.i_colorprim = 1;
292 param.vui.i_transfer = 1;
293 param.vui.i_colmatrix = 1;
297 // ITU BT.601 DVD or SD TV content
298 param.vui.i_colorprim = 6;
299 param.vui.i_transfer = 1;
300 param.vui.i_colmatrix = 6;
303 if( job->anamorphic.mode )
305 param.vui.i_sar_width = job->anamorphic.par_width;
306 param.vui.i_sar_height = job->anamorphic.par_height;
308 hb_log( "encx264: encoding with stored aspect %d/%d",
309 param.vui.i_sar_width, param.vui.i_sar_height );
313 if( job->vquality > 0.0 && job->vquality < 1.0 )
319 param.rc.i_rc_method = X264_RC_CRF;
320 param.rc.f_rf_constant = 51 - job->vquality * 51;
321 hb_log( "encx264: Encoding at constant RF %f",
322 param.rc.f_rf_constant );
327 param.rc.i_rc_method = X264_RC_CQP;
328 param.rc.i_qp_constant = 51 - job->vquality * 51;
329 hb_log( "encx264: encoding at constant QP %d",
330 param.rc.i_qp_constant );
334 else if( job->vquality == 0 || job->vquality >= 1.0 )
336 /* Use the vquality as a raw RF or QP
337 instead of treating it like a percentage. */
342 param.rc.i_rc_method = X264_RC_CRF;
343 param.rc.f_rf_constant = job->vquality;
344 hb_log( "encx264: Encoding at constant RF %f",
345 param.rc.f_rf_constant );
350 param.rc.i_rc_method = X264_RC_CQP;
351 param.rc.i_qp_constant = job->vquality;
352 hb_log( "encx264: encoding at constant QP %d",
353 param.rc.i_qp_constant );
360 param.rc.i_rc_method = X264_RC_ABR;
361 param.rc.i_bitrate = job->vbitrate;
365 param.rc.b_stat_write = 1;
366 param.rc.psz_stat_out = pv->filename;
369 param.rc.b_stat_read = 1;
370 param.rc.psz_stat_in = pv->filename;
375 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
376 pv->x264 = x264_encoder_open( ¶m );
378 x264_encoder_headers( pv->x264, &nal, &nal_count );
380 /* Sequence Parameter Set */
381 memcpy(w->config->h264.sps, nal[1].p_payload + 4, nal[1].i_payload - 4);
382 w->config->h264.sps_length = nal[1].i_payload - 4;
384 /* Picture Parameter Set */
385 memcpy(w->config->h264.pps, nal[2].p_payload + 4, nal[2].i_payload - 4);
386 w->config->h264.pps_length = nal[2].i_payload - 4;
388 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
389 job->width, job->height );
391 pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
392 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
396 /* Basic initDelay value is the clockrate divided by the FPS
397 -- the length of one frame in clockticks. */
398 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
400 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
401 creates that average rate by repeating 59.95 fields so the max
402 frame size is actually 4504.5 (3 field times). The field durations
403 are computed based on quantized times (see below) so we need an extra
404 two ticks to account for the rounding. */
405 if (pv->init_delay == 3753)
406 pv->init_delay = 4507;
408 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
409 exact in the DVD 27MHz system clock but we never see that) so the
410 rates computed above are all +-1 due to quantization. Worst case
411 is when a clock-rounded-down frame is adjacent to a rounded-up frame
412 which makes one of the frames 2 ticks longer than the nominal
416 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
417 will use the duration of frames running at 23.976fps instead.
418 Since detelecine occasionally makes mistakes and since we have
419 to deal with some really horrible timing jitter from mkvs and
420 mp4s encoded with low resolution clocks, make the delay very
421 conservative if we're not doing CFR. */
427 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
428 pv->init_delay *= job->areBframes;
430 w->config->h264.init_delay = pv->init_delay;
435 void encx264Close( hb_work_object_t * w )
437 hb_work_private_t * pv = w->private_data;
439 if ( pv->frames_split )
441 hb_log( "encx264: %u frames had to be split (%u in, %u out)",
442 pv->frames_split, pv->frames_in, pv->frames_out );
445 * Patch the x264 allocated data back in so that x264 can free it
446 * we have been using our own buffers during the encode to avoid copying.
448 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
449 x264_picture_clean( &pv->pic_in );
450 x264_encoder_close( pv->x264 );
452 w->private_data = NULL;
458 * see comments in definition of 'frame_info' in pv struct for description
459 * of what these routines are doing.
461 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
463 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
464 pv->frame_info[i].duration = in->stop - in->start;
467 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
469 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
470 return pv->frame_info[i].duration;
473 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
474 int i_nal, x264_nal_t *nal )
476 hb_buffer_t *buf = NULL;
477 hb_work_private_t *pv = w->private_data;
478 hb_job_t *job = pv->job;
480 /* Should be way too large */
481 buf = hb_video_buffer_init( job->width, job->height );
485 // use the pts to get the original frame's duration.
486 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
487 buf->start = pic_out->i_pts;
488 buf->stop = pic_out->i_pts + duration;
490 /* Encode all the NALs we were given into buf.
491 NOTE: This code assumes one video frame per NAL (but there can
492 be other stuff like SPS and/or PPS). If there are multiple
493 frames we only get the duration of the first which will
494 eventually screw up the muxer & decoder. */
496 for( i = 0; i < i_nal; i++ )
498 int size = nal[i].i_payload;
499 memcpy(buf->data + buf->size, nal[i].p_payload, size);
505 if( job->mux & HB_MUX_AVI )
507 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
509 buf->frametype = HB_FRAME_KEY;
515 /* H.264 in .mp4 or .mkv */
516 switch( nal[i].i_type )
518 /* Sequence Parameter Set & Program Parameter Set go in the
519 * mp4 header so skip them here
532 /* Decide what type of frame we have. */
533 switch( pic_out->i_type )
536 buf->frametype = HB_FRAME_IDR;
537 /* if we have a chapter marker pending and this
538 frame's presentation time stamp is at or after
539 the marker's time stamp, use this as the
541 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
544 buf->new_chap = pv->chap_mark;
549 buf->frametype = HB_FRAME_I;
553 buf->frametype = HB_FRAME_P;
557 buf->frametype = HB_FRAME_B;
560 /* This is for b-pyramid, which has reference b-frames
561 However, it doesn't seem to ever be used... */
563 buf->frametype = HB_FRAME_BREF;
566 // If it isn't the above, what type of frame is it??
572 /* Since libx264 doesn't tell us when b-frames are
573 themselves reference frames, figure it out on our own. */
574 if( (buf->frametype == HB_FRAME_B) &&
575 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
576 buf->frametype = HB_FRAME_BREF;
578 /* Expose disposable bit to muxer. */
579 if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
580 buf->flags &= ~HB_FRAME_REF;
582 buf->flags |= HB_FRAME_REF;
586 // make sure we found at least one video frame
587 if ( buf->size <= 0 )
589 // no video - discard the buf
590 hb_buffer_close( &buf );
595 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
597 hb_work_private_t *pv = w->private_data;
598 hb_job_t *job = pv->job;
600 /* Point x264 at our current buffers Y(UV) data. */
601 pv->pic_in.img.plane[0] = in->data;
603 int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
606 /* XXX x264 has currently no option for grayscale encoding */
607 memset( pv->pic_in.img.plane[1], 0x80, uvsize );
608 memset( pv->pic_in.img.plane[2], 0x80, uvsize );
612 /* Point x264 at our buffers (Y)UV data */
613 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
614 pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
616 if( in->new_chap && job->chapter_markers )
618 /* chapters have to start with an IDR frame so request that this
619 frame be coded as IDR. Since there may be up to 16 frames
620 currently buffered in the encoder remember the timestamp so
621 when this frame finally pops out of the encoder we'll mark
622 its buffer as the start of a chapter. */
623 pv->pic_in.i_type = X264_TYPE_IDR;
624 if( pv->next_chap == 0 )
626 pv->next_chap = in->start;
627 pv->chap_mark = in->new_chap;
629 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
634 pv->pic_in.i_type = X264_TYPE_AUTO;
636 pv->pic_in.i_qpplus1 = 0;
638 /* XXX this is temporary debugging code to check that the upstream
639 * modules (render & sync) have generated a continuous, self-consistent
640 * frame stream with the current frame's start time equal to the
641 * previous frame's stop time.
643 if( pv->last_stop != in->start )
645 hb_log("encx264 input continuity err: last stop %"PRId64" start %"PRId64,
646 pv->last_stop, in->start);
648 pv->last_stop = in->stop;
650 // Remember info about this frame that we need to pass across
651 // the x264_encoder_encode call (since it reorders frames).
652 save_frame_info( pv, in );
654 /* Feed the input PTS to x264 so it can figure out proper output PTS */
655 pv->pic_in.i_pts = in->start;
657 x264_picture_t pic_out;
661 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
664 return nal_encode( w, &pic_out, i_nal, nal );
669 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
670 hb_buffer_t ** buf_out )
672 hb_work_private_t *pv = w->private_data;
673 hb_buffer_t *in = *buf_in;
679 // EOF on input. Flush any frames still in the decoder then
680 // send the eof downstream to tell the muxer we're done.
681 x264_picture_t pic_out;
684 hb_buffer_t *last_buf = NULL;
688 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
692 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
696 if ( last_buf == NULL )
699 last_buf->next = buf;
703 // Flushed everything - add the eof to the end of the chain.
704 if ( last_buf == NULL )
713 // Not EOF - encode the packet & wrap it in a NAL
716 // if we're re-ordering frames, check if this frame is too large to reorder
717 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
719 // This frame's duration is larger than the time allotted for b-frame
720 // reordering. That means that if it's used as a reference the decoder
721 // won't be able to move it early enough to render it in correct
722 // sequence & the playback will have odd jumps & twitches. To make
723 // sure this doesn't happen we pretend this frame is multiple
724 // frames, each with duration <= init_delay. Since each of these
725 // new frames contains the same image the visual effect is identical
726 // to the original but the resulting stream can now be coded without
727 // error. We take advantage of the fact that x264 buffers frame
728 // data internally to feed the same image into the encoder multiple
729 // times, just changing its start & stop times each time.
731 int64_t orig_stop = in->stop;
732 int64_t new_stop = in->start;
733 hb_buffer_t *last_buf = NULL;
735 // We want to spread the new frames uniformly over the total time
736 // so that we don't end up with a very short frame at the end.
737 // In the number of pieces calculation we add in init_delay-1 to
738 // round up but not add an extra piece if the frame duration is
739 // a multiple of init_delay. The final increment of frame_dur is
740 // to restore the bits that got truncated by the divide on the
741 // previous line. If we don't do this we end up with an extra tiny
742 // frame at the end whose duration is npieces-1.
743 int64_t frame_dur = orig_stop - new_stop;
744 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
745 frame_dur /= npieces;
748 while ( in->start < orig_stop )
750 new_stop += frame_dur;
751 if ( new_stop > orig_stop )
752 new_stop = orig_stop;
754 hb_buffer_t *buf = x264_encode( w, in );
758 if ( last_buf == NULL )
761 last_buf->next = buf;
764 in->start = new_stop;
770 *buf_out = x264_encode( w, in );