1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
55 uint32_t frames_split; // number of frames we had to split
56 int chap_mark; // saved chap mark when we're propagating it
57 int64_t last_stop; // Debugging - stop time of previous input frame
63 } frame_info[FRAME_INFO_SIZE];
68 /***********************************************************************
69 * hb_work_encx264_init
70 ***********************************************************************
72 **********************************************************************/
73 int encx264Init( hb_work_object_t * w, hb_job_t * job )
80 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
85 memset( pv->filename, 0, 1024 );
86 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
88 x264_param_default( ¶m );
91 param.analyse.b_psnr = 1;
92 param.analyse.b_ssim = 1;
94 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
95 param.i_width = job->width;
96 param.i_height = job->height;
97 param.i_fps_num = job->vrate;
98 param.i_fps_den = job->vrate_base;
100 /* Set min:max key intervals ratio to 1:10 of fps.
101 * This section is skipped if fps=25 (default).
103 if (job->vrate_base != 1080000)
105 if (job->pass == 2 && !job->cfr )
107 /* Even though the framerate might be different due to VFR,
108 we still want the same keyframe intervals as the 1st pass,
109 so the 1st pass stats won't conflict on frame decisions. */
110 hb_interjob_t * interjob = hb_interjob_get( job->h );
111 param.i_keyint_min = ( interjob->vrate / interjob->vrate_base ) + 1;
112 param.i_keyint_max = ( 10 * interjob->vrate / interjob->vrate_base ) + 1;
116 int fps = job->vrate / job->vrate_base;
118 /* adjust +1 when fps has remainder to bump
119 { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
120 if (job->vrate % job->vrate_base)
123 param.i_keyint_min = fps;
124 param.i_keyint_max = fps * 10;
127 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
130 param.i_log_level = X264_LOG_INFO;
131 if( job->h264_level )
134 param.i_level_idc = job->h264_level;
135 hb_log( "encx264: encoding at level %i",
139 /* B-frames are on by default.*/
143 This section passes the string x264opts to libx264 for parsing into
144 parameter names and values.
146 The string is set up like this:
147 option1=value1:option2=value 2
149 So, you have to iterate through based on the colons, and then put
150 the left side of the equals sign in "name" and the right side into
151 "value." Then you hand those strings off to x264 for interpretation.
153 This is all based on the universal x264 option handling Loren
154 Merritt implemented in the Mplayer/Mencoder project.
157 if( job->x264opts != NULL && *job->x264opts != '\0' )
159 char *x264opts, *x264opts_start;
161 x264opts = x264opts_start = strdup(job->x264opts);
163 while( x264opts_start && *x264opts )
165 char *name = x264opts;
169 x264opts += strcspn( x264opts, ":" );
176 value = strchr( name, '=' );
184 When B-frames are enabled, the max frame count increments
185 by 1 (regardless of the number of B-frames). If you don't
186 change the duration of the video track when you mux, libmp4
187 barfs. So, check if the x264opts aren't using B-frames, and
188 when they aren't, set the boolean job->areBframes as false.
190 if( !( strcmp( name, "bframes" ) ) )
192 if( atoi( value ) == 0 )
198 /* Note b-pyramid here, so the initial delay can be doubled */
199 if( !( strcmp( name, "b-pyramid" ) ) )
203 if( atoi( value ) > 0 )
214 /* Here's where the strings are passed to libx264 for parsing. */
215 ret = x264_param_parse( ¶m, name, value );
217 /* Let x264 sanity check the options for us*/
218 if( ret == X264_PARAM_BAD_NAME )
219 hb_log( "x264 options: Unknown suboption %s", name );
220 if( ret == X264_PARAM_BAD_VALUE )
221 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
223 free(x264opts_start);
226 /* set up the VUI color model & gamma to match what the COLR atom
227 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
228 if( job->color_matrix == 1 )
230 // ITU BT.601 DVD or SD TV content
231 param.vui.i_colorprim = 6;
232 param.vui.i_transfer = 1;
233 param.vui.i_colmatrix = 6;
235 else if( job->color_matrix == 2 )
237 // ITU BT.709 HD content
238 param.vui.i_colorprim = 1;
239 param.vui.i_transfer = 1;
240 param.vui.i_colmatrix = 1;
242 else if ( job->title->width >= 1280 || job->title->height >= 720 )
244 // we guess that 720p or above is ITU BT.709 HD content
245 param.vui.i_colorprim = 1;
246 param.vui.i_transfer = 1;
247 param.vui.i_colmatrix = 1;
251 // ITU BT.601 DVD or SD TV content
252 param.vui.i_colorprim = 6;
253 param.vui.i_transfer = 1;
254 param.vui.i_colmatrix = 6;
257 if( job->anamorphic.mode )
259 param.vui.i_sar_width = job->anamorphic.par_width;
260 param.vui.i_sar_height = job->anamorphic.par_height;
262 hb_log( "encx264: encoding with stored aspect %d/%d",
263 param.vui.i_sar_width, param.vui.i_sar_height );
267 if( job->vquality > 0.0 && job->vquality < 1.0 )
273 param.rc.i_rc_method = X264_RC_CRF;
274 param.rc.f_rf_constant = 51 - job->vquality * 51;
275 hb_log( "encx264: Encoding at constant RF %f",
276 param.rc.f_rf_constant );
281 param.rc.i_rc_method = X264_RC_CQP;
282 param.rc.i_qp_constant = 51 - job->vquality * 51;
283 hb_log( "encx264: encoding at constant QP %d",
284 param.rc.i_qp_constant );
288 else if( job->vquality == 0 || job->vquality >= 1.0 )
290 /* Use the vquality as a raw RF or QP
291 instead of treating it like a percentage. */
296 param.rc.i_rc_method = X264_RC_CRF;
297 param.rc.f_rf_constant = job->vquality;
298 hb_log( "encx264: Encoding at constant RF %f",
299 param.rc.f_rf_constant );
304 param.rc.i_rc_method = X264_RC_CQP;
305 param.rc.i_qp_constant = job->vquality;
306 hb_log( "encx264: encoding at constant QP %d",
307 param.rc.i_qp_constant );
314 param.rc.i_rc_method = X264_RC_ABR;
315 param.rc.i_bitrate = job->vbitrate;
319 param.rc.b_stat_write = 1;
320 param.rc.psz_stat_out = pv->filename;
323 param.rc.b_stat_read = 1;
324 param.rc.psz_stat_in = pv->filename;
329 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
330 pv->x264 = x264_encoder_open( ¶m );
332 x264_encoder_headers( pv->x264, &nal, &nal_count );
334 /* Sequence Parameter Set */
335 x264_nal_encode( w->config->h264.sps, &nal_size, 0, &nal[1] );
336 w->config->h264.sps_length = nal_size;
338 /* Picture Parameter Set */
339 x264_nal_encode( w->config->h264.pps, &nal_size, 0, &nal[2] );
340 w->config->h264.pps_length = nal_size;
342 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
343 job->width, job->height );
345 pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
346 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
350 /* Basic initDelay value is the clockrate divided by the FPS
351 -- the length of one frame in clockticks. */
352 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
354 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
355 creates that average rate by repeating 59.95 fields so the max
356 frame size is actually 4504.5 (3 field times). The field durations
357 are computed based on quantized times (see below) so we need an extra
358 two ticks to account for the rounding. */
359 if (pv->init_delay == 3753)
360 pv->init_delay = 4507;
362 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
363 exact in the DVD 27MHz system clock but we never see that) so the
364 rates computed above are all +-1 due to quantization. Worst case
365 is when a clock-rounded-down frame is adjacent to a rounded-up frame
366 which makes one of the frames 2 ticks longer than the nominal
370 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
371 will use the duration of frames running at 23.976fps instead.
372 Since detelecine occasionally makes mistakes and since we have
373 to deal with some really horrible timing jitter from mkvs and
374 mp4s encoded with low resolution clocks, make the delay very
375 conservative if we're not doing CFR. */
381 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
382 pv->init_delay *= job->areBframes;
384 w->config->h264.init_delay = pv->init_delay;
389 void encx264Close( hb_work_object_t * w )
391 hb_work_private_t * pv = w->private_data;
393 if ( pv->frames_split )
395 hb_log( "encx264: %u frames had to be split (%u in, %u out)",
396 pv->frames_split, pv->frames_in, pv->frames_out );
399 * Patch the x264 allocated data back in so that x264 can free it
400 * we have been using our own buffers during the encode to avoid copying.
402 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
403 x264_picture_clean( &pv->pic_in );
404 x264_encoder_close( pv->x264 );
406 w->private_data = NULL;
412 * see comments in definition of 'frame_info' in pv struct for description
413 * of what these routines are doing.
415 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
417 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
418 pv->frame_info[i].duration = in->stop - in->start;
421 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
423 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
424 return pv->frame_info[i].duration;
427 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
428 int i_nal, x264_nal_t *nal )
430 hb_buffer_t *buf = NULL;
431 hb_work_private_t *pv = w->private_data;
432 hb_job_t *job = pv->job;
434 /* Should be way too large */
435 buf = hb_video_buffer_init( job->width, job->height );
439 // use the pts to get the original frame's duration.
440 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
441 buf->start = pic_out->i_pts;
442 buf->stop = pic_out->i_pts + duration;
444 /* Encode all the NALs we were given into buf.
445 NOTE: This code assumes one video frame per NAL (but there can
446 be other stuff like SPS and/or PPS). If there are multiple
447 frames we only get the duration of the first which will
448 eventually screw up the muxer & decoder. */
450 for( i = 0; i < i_nal; i++ )
452 int data = buf->alloc - buf->size;
453 int size = x264_nal_encode( buf->data + buf->size, &data, 1, &nal[i] );
459 if( job->mux & HB_MUX_AVI )
461 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
463 buf->frametype = HB_FRAME_KEY;
469 /* H.264 in .mp4 or .mkv */
470 switch( nal[i].i_type )
472 /* Sequence Parameter Set & Program Parameter Set go in the
473 * mp4 header so skip them here
486 /* H.264 in mp4 (stolen from mp4creator) */
487 buf->data[buf->size+0] = ( ( size - 4 ) >> 24 ) & 0xFF;
488 buf->data[buf->size+1] = ( ( size - 4 ) >> 16 ) & 0xFF;
489 buf->data[buf->size+2] = ( ( size - 4 ) >> 8 ) & 0xFF;
490 buf->data[buf->size+3] = ( ( size - 4 ) >> 0 ) & 0xFF;
492 /* Decide what type of frame we have. */
493 switch( pic_out->i_type )
496 buf->frametype = HB_FRAME_IDR;
497 /* if we have a chapter marker pending and this
498 frame's presentation time stamp is at or after
499 the marker's time stamp, use this as the
501 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
504 buf->new_chap = pv->chap_mark;
509 buf->frametype = HB_FRAME_I;
513 buf->frametype = HB_FRAME_P;
517 buf->frametype = HB_FRAME_B;
520 /* This is for b-pyramid, which has reference b-frames
521 However, it doesn't seem to ever be used... */
523 buf->frametype = HB_FRAME_BREF;
526 // If it isn't the above, what type of frame is it??
532 /* Since libx264 doesn't tell us when b-frames are
533 themselves reference frames, figure it out on our own. */
534 if( (buf->frametype == HB_FRAME_B) &&
535 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
536 buf->frametype = HB_FRAME_BREF;
538 /* Expose disposable bit to muxer. */
539 if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
540 buf->flags &= ~HB_FRAME_REF;
542 buf->flags |= HB_FRAME_REF;
546 // make sure we found at least one video frame
547 if ( buf->size <= 0 )
549 // no video - discard the buf
550 hb_buffer_close( &buf );
555 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
557 hb_work_private_t *pv = w->private_data;
558 hb_job_t *job = pv->job;
560 /* Point x264 at our current buffers Y(UV) data. */
561 pv->pic_in.img.plane[0] = in->data;
563 int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
566 /* XXX x264 has currently no option for grayscale encoding */
567 memset( pv->pic_in.img.plane[1], 0x80, uvsize );
568 memset( pv->pic_in.img.plane[2], 0x80, uvsize );
572 /* Point x264 at our buffers (Y)UV data */
573 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
574 pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
576 if( in->new_chap && job->chapter_markers )
578 /* chapters have to start with an IDR frame so request that this
579 frame be coded as IDR. Since there may be up to 16 frames
580 currently buffered in the encoder remember the timestamp so
581 when this frame finally pops out of the encoder we'll mark
582 its buffer as the start of a chapter. */
583 pv->pic_in.i_type = X264_TYPE_IDR;
584 if( pv->next_chap == 0 )
586 pv->next_chap = in->start;
587 pv->chap_mark = in->new_chap;
589 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
594 pv->pic_in.i_type = X264_TYPE_AUTO;
596 pv->pic_in.i_qpplus1 = 0;
598 /* XXX this is temporary debugging code to check that the upstream
599 * modules (render & sync) have generated a continuous, self-consistent
600 * frame stream with the current frame's start time equal to the
601 * previous frame's stop time.
603 if( pv->last_stop != in->start )
605 hb_log("encx264 input continuity err: last stop %"PRId64" start %"PRId64,
606 pv->last_stop, in->start);
608 pv->last_stop = in->stop;
610 // Remember info about this frame that we need to pass across
611 // the x264_encoder_encode call (since it reorders frames).
612 save_frame_info( pv, in );
614 /* Feed the input PTS to x264 so it can figure out proper output PTS */
615 pv->pic_in.i_pts = in->start;
617 x264_picture_t pic_out;
621 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
624 return nal_encode( w, &pic_out, i_nal, nal );
629 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
630 hb_buffer_t ** buf_out )
632 hb_work_private_t *pv = w->private_data;
633 hb_buffer_t *in = *buf_in;
639 // EOF on input. Flush any frames still in the decoder then
640 // send the eof downstream to tell the muxer we're done.
641 x264_picture_t pic_out;
644 hb_buffer_t *last_buf = NULL;
648 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
652 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
656 if ( last_buf == NULL )
659 last_buf->next = buf;
663 // Flushed everything - add the eof to the end of the chain.
664 if ( last_buf == NULL )
673 // Not EOF - encode the packet & wrap it in a NAL
676 // if we're re-ordering frames, check if this frame is too large to reorder
677 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
679 // This frame's duration is larger than the time allotted for b-frame
680 // reordering. That means that if it's used as a reference the decoder
681 // won't be able to move it early enough to render it in correct
682 // sequence & the playback will have odd jumps & twitches. To make
683 // sure this doesn't happen we pretend this frame is multiple
684 // frames, each with duration <= init_delay. Since each of these
685 // new frames contains the same image the visual effect is identical
686 // to the original but the resulting stream can now be coded without
687 // error. We take advantage of the fact that x264 buffers frame
688 // data internally to feed the same image into the encoder multiple
689 // times, just changing its start & stop times each time.
691 int64_t orig_stop = in->stop;
692 int64_t new_stop = in->start;
693 hb_buffer_t *last_buf = NULL;
695 // We want to spread the new frames uniformly over the total time
696 // so that we don't end up with a very short frame at the end.
697 // In the number of pieces calculation we add in init_delay-1 to
698 // round up but not add an extra piece if the frame duration is
699 // a multiple of init_delay. The final increment of frame_dur is
700 // to restore the bits that got truncated by the divide on the
701 // previous line. If we don't do this we end up with an extra tiny
702 // frame at the end whose duration is npieces-1.
703 int64_t frame_dur = orig_stop - new_stop;
704 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
705 frame_dur /= npieces;
708 while ( in->start < orig_stop )
710 new_stop += frame_dur;
711 if ( new_stop > orig_stop )
712 new_stop = orig_stop;
714 hb_buffer_t *buf = x264_encode( w, in );
718 if ( last_buf == NULL )
721 last_buf->next = buf;
724 in->start = new_stop;
730 *buf_out = x264_encode( w, in );