1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
55 uint32_t frames_split; // number of frames we had to split
56 int chap_mark; // saved chap mark when we're propagating it
57 int64_t last_stop; // Debugging - stop time of previous input frame
63 } frame_info[FRAME_INFO_SIZE];
68 /***********************************************************************
69 * hb_work_encx264_init
70 ***********************************************************************
72 **********************************************************************/
73 int encx264Init( hb_work_object_t * w, hb_job_t * job )
80 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
85 memset( pv->filename, 0, 1024 );
86 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
88 x264_param_default( ¶m );
90 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
91 param.i_width = job->width;
92 param.i_height = job->height;
93 param.i_fps_num = job->vrate;
94 param.i_fps_den = job->vrate_base;
96 /* Set min:max key intervals ratio to 1:10 of fps.
97 * This section is skipped if fps=25 (default).
99 if (job->vrate_base != 1080000)
101 if (job->pass == 2 && !job->cfr )
103 /* Even though the framerate might be different due to VFR,
104 we still want the same keyframe intervals as the 1st pass,
105 so the 1st pass stats won't conflict on frame decisions. */
106 hb_interjob_t * interjob = hb_interjob_get( job->h );
107 param.i_keyint_min = ( interjob->vrate / interjob->vrate_base ) + 1;
108 param.i_keyint_max = ( 10 * interjob->vrate / interjob->vrate_base ) + 1;
112 int fps = job->vrate / job->vrate_base;
114 /* adjust +1 when fps has remainder to bump
115 { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
116 if (job->vrate % job->vrate_base)
119 param.i_keyint_min = fps;
120 param.i_keyint_max = fps * 10;
123 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
126 param.i_log_level = X264_LOG_INFO;
127 if( job->h264_level )
130 param.i_level_idc = job->h264_level;
131 hb_log( "encx264: encoding at level %i",
136 This section passes the string x264opts to libx264 for parsing into
137 parameter names and values.
139 The string is set up like this:
140 option1=value1:option2=value 2
142 So, you have to iterate through based on the colons, and then put
143 the left side of the equals sign in "name" and the right side into
144 "value." Then you hand those strings off to x264 for interpretation.
146 This is all based on the universal x264 option handling Loren
147 Merritt implemented in the Mplayer/Mencoder project.
150 if( job->x264opts != NULL && *job->x264opts != '\0' )
152 char *x264opts, *x264opts_start;
154 x264opts = x264opts_start = strdup(job->x264opts);
156 while( x264opts_start && *x264opts )
158 char *name = x264opts;
162 x264opts += strcspn( x264opts, ":" );
169 value = strchr( name, '=' );
177 When B-frames are enabled, the max frame count increments
178 by 1 (regardless of the number of B-frames). If you don't
179 change the duration of the video track when you mux, libmp4
180 barfs. So, check if the x264opts are using B-frames, and
181 when they are, set the boolean job->areBframes as true.
184 if( !( strcmp( name, "bframes" ) ) )
186 if( atoi( value ) > 0 )
192 /* Note b-pyramid here, so the initial delay can be doubled */
193 if( !( strcmp( name, "b-pyramid" ) ) )
197 if( atoi( value ) > 0 )
208 /* Here's where the strings are passed to libx264 for parsing. */
209 ret = x264_param_parse( ¶m, name, value );
211 /* Let x264 sanity check the options for us*/
212 if( ret == X264_PARAM_BAD_NAME )
213 hb_log( "x264 options: Unknown suboption %s", name );
214 if( ret == X264_PARAM_BAD_VALUE )
215 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
217 free(x264opts_start);
220 /* set up the VUI color model & gamma to match what the COLR atom
221 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
222 if( job->color_matrix == 1 )
224 // ITU BT.601 DVD or SD TV content
225 param.vui.i_colorprim = 6;
226 param.vui.i_transfer = 1;
227 param.vui.i_colmatrix = 6;
229 else if( job->color_matrix == 2 )
231 // ITU BT.709 HD content
232 param.vui.i_colorprim = 1;
233 param.vui.i_transfer = 1;
234 param.vui.i_colmatrix = 1;
236 else if ( job->title->width >= 1280 || job->title->height >= 720 )
238 // we guess that 720p or above is ITU BT.709 HD content
239 param.vui.i_colorprim = 1;
240 param.vui.i_transfer = 1;
241 param.vui.i_colmatrix = 1;
245 // ITU BT.601 DVD or SD TV content
246 param.vui.i_colorprim = 6;
247 param.vui.i_transfer = 1;
248 param.vui.i_colmatrix = 6;
251 if( job->anamorphic.mode )
253 param.vui.i_sar_width = job->anamorphic.par_width;
254 param.vui.i_sar_height = job->anamorphic.par_height;
256 hb_log( "encx264: encoding with stored aspect %d/%d",
257 param.vui.i_sar_width, param.vui.i_sar_height );
261 if( job->vquality > 0.0 && job->vquality < 1.0 )
267 param.rc.i_rc_method = X264_RC_CRF;
268 param.rc.f_rf_constant = 51 - job->vquality * 51;
269 hb_log( "encx264: Encoding at constant RF %f",
270 param.rc.f_rf_constant );
275 param.rc.i_rc_method = X264_RC_CQP;
276 param.rc.i_qp_constant = 51 - job->vquality * 51;
277 hb_log( "encx264: encoding at constant QP %d",
278 param.rc.i_qp_constant );
282 else if( job->vquality == 0 || job->vquality >= 1.0 )
284 /* Use the vquality as a raw RF or QP
285 instead of treating it like a percentage. */
290 param.rc.i_rc_method = X264_RC_CRF;
291 param.rc.f_rf_constant = job->vquality;
292 hb_log( "encx264: Encoding at constant RF %f",
293 param.rc.f_rf_constant );
298 param.rc.i_rc_method = X264_RC_CQP;
299 param.rc.i_qp_constant = job->vquality;
300 hb_log( "encx264: encoding at constant QP %d",
301 param.rc.i_qp_constant );
308 param.rc.i_rc_method = X264_RC_ABR;
309 param.rc.i_bitrate = job->vbitrate;
313 param.rc.b_stat_write = 1;
314 param.rc.psz_stat_out = pv->filename;
317 param.rc.b_stat_read = 1;
318 param.rc.psz_stat_in = pv->filename;
323 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
324 pv->x264 = x264_encoder_open( ¶m );
326 x264_encoder_headers( pv->x264, &nal, &nal_count );
328 /* Sequence Parameter Set */
329 x264_nal_encode( w->config->h264.sps, &nal_size, 0, &nal[1] );
330 w->config->h264.sps_length = nal_size;
332 /* Picture Parameter Set */
333 x264_nal_encode( w->config->h264.pps, &nal_size, 0, &nal[2] );
334 w->config->h264.pps_length = nal_size;
336 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
337 job->width, job->height );
339 pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
340 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
344 /* Basic initDelay value is the clockrate divided by the FPS
345 -- the length of one frame in clockticks. */
346 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
348 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
349 creates that average rate by repeating 59.95 fields so the max
350 frame size is actually 4504.5 (3 field times). The field durations
351 are computed based on quantized times (see below) so we need an extra
352 two ticks to account for the rounding. */
353 if (pv->init_delay == 3753)
354 pv->init_delay = 4507;
356 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
357 exact in the DVD 27MHz system clock but we never see that) so the
358 rates computed above are all +-1 due to quantization. Worst case
359 is when a clock-rounded-down frame is adjacent to a rounded-up frame
360 which makes one of the frames 2 ticks longer than the nominal
364 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
365 will use the duration of frames running at 23.976fps instead.
366 Since detelecine occasionally makes mistakes and since we have
367 to deal with some really horrible timing jitter from mkvs and
368 mp4s encoded with low resolution clocks, make the delay very
369 conservative if we're not doing CFR. */
375 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
376 pv->init_delay *= job->areBframes;
378 w->config->h264.init_delay = pv->init_delay;
383 void encx264Close( hb_work_object_t * w )
385 hb_work_private_t * pv = w->private_data;
387 if ( pv->frames_split )
389 hb_log( "encx264: %u frames had to be split (%u in, %u out)",
390 pv->frames_split, pv->frames_in, pv->frames_out );
393 * Patch the x264 allocated data back in so that x264 can free it
394 * we have been using our own buffers during the encode to avoid copying.
396 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
397 x264_picture_clean( &pv->pic_in );
398 x264_encoder_close( pv->x264 );
400 w->private_data = NULL;
406 * see comments in definition of 'frame_info' in pv struct for description
407 * of what these routines are doing.
409 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
411 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
412 pv->frame_info[i].duration = in->stop - in->start;
415 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
417 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
418 return pv->frame_info[i].duration;
421 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
422 int i_nal, x264_nal_t *nal )
424 hb_buffer_t *buf = NULL;
425 hb_work_private_t *pv = w->private_data;
426 hb_job_t *job = pv->job;
428 /* Should be way too large */
429 buf = hb_video_buffer_init( job->width, job->height );
433 // use the pts to get the original frame's duration.
434 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
435 buf->start = pic_out->i_pts;
436 buf->stop = pic_out->i_pts + duration;
438 /* Encode all the NALs we were given into buf.
439 NOTE: This code assumes one video frame per NAL (but there can
440 be other stuff like SPS and/or PPS). If there are multiple
441 frames we only get the duration of the first which will
442 eventually screw up the muxer & decoder. */
444 for( i = 0; i < i_nal; i++ )
446 int data = buf->alloc - buf->size;
447 int size = x264_nal_encode( buf->data + buf->size, &data, 1, &nal[i] );
453 if( job->mux & HB_MUX_AVI )
455 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
457 buf->frametype = HB_FRAME_KEY;
463 /* H.264 in .mp4 or .mkv */
464 switch( nal[i].i_type )
466 /* Sequence Parameter Set & Program Parameter Set go in the
467 * mp4 header so skip them here
480 /* H.264 in mp4 (stolen from mp4creator) */
481 buf->data[buf->size+0] = ( ( size - 4 ) >> 24 ) & 0xFF;
482 buf->data[buf->size+1] = ( ( size - 4 ) >> 16 ) & 0xFF;
483 buf->data[buf->size+2] = ( ( size - 4 ) >> 8 ) & 0xFF;
484 buf->data[buf->size+3] = ( ( size - 4 ) >> 0 ) & 0xFF;
486 /* Decide what type of frame we have. */
487 switch( pic_out->i_type )
490 buf->frametype = HB_FRAME_IDR;
491 /* if we have a chapter marker pending and this
492 frame's presentation time stamp is at or after
493 the marker's time stamp, use this as the
495 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
498 buf->new_chap = pv->chap_mark;
503 buf->frametype = HB_FRAME_I;
507 buf->frametype = HB_FRAME_P;
511 buf->frametype = HB_FRAME_B;
514 /* This is for b-pyramid, which has reference b-frames
515 However, it doesn't seem to ever be used... */
517 buf->frametype = HB_FRAME_BREF;
520 // If it isn't the above, what type of frame is it??
526 /* Since libx264 doesn't tell us when b-frames are
527 themselves reference frames, figure it out on our own. */
528 if( (buf->frametype == HB_FRAME_B) &&
529 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
530 buf->frametype = HB_FRAME_BREF;
532 /* Expose disposable bit to muxer. */
533 if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
534 buf->flags &= ~HB_FRAME_REF;
536 buf->flags |= HB_FRAME_REF;
540 // make sure we found at least one video frame
541 if ( buf->size <= 0 )
543 // no video - discard the buf
544 hb_buffer_close( &buf );
549 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
551 hb_work_private_t *pv = w->private_data;
552 hb_job_t *job = pv->job;
554 /* Point x264 at our current buffers Y(UV) data. */
555 pv->pic_in.img.plane[0] = in->data;
557 int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
560 /* XXX x264 has currently no option for grayscale encoding */
561 memset( pv->pic_in.img.plane[1], 0x80, uvsize );
562 memset( pv->pic_in.img.plane[2], 0x80, uvsize );
566 /* Point x264 at our buffers (Y)UV data */
567 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
568 pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
570 if( in->new_chap && job->chapter_markers )
572 /* chapters have to start with an IDR frame so request that this
573 frame be coded as IDR. Since there may be up to 16 frames
574 currently buffered in the encoder remember the timestamp so
575 when this frame finally pops out of the encoder we'll mark
576 its buffer as the start of a chapter. */
577 pv->pic_in.i_type = X264_TYPE_IDR;
578 if( pv->next_chap == 0 )
580 pv->next_chap = in->start;
581 pv->chap_mark = in->new_chap;
583 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
588 pv->pic_in.i_type = X264_TYPE_AUTO;
590 pv->pic_in.i_qpplus1 = 0;
592 /* XXX this is temporary debugging code to check that the upstream
593 * modules (render & sync) have generated a continuous, self-consistent
594 * frame stream with the current frame's start time equal to the
595 * previous frame's stop time.
597 if( pv->last_stop != in->start )
599 hb_log("encx264 input continuity err: last stop %"PRId64" start %"PRId64,
600 pv->last_stop, in->start);
602 pv->last_stop = in->stop;
604 // Remember info about this frame that we need to pass across
605 // the x264_encoder_encode call (since it reorders frames).
606 save_frame_info( pv, in );
608 /* Feed the input PTS to x264 so it can figure out proper output PTS */
609 pv->pic_in.i_pts = in->start;
611 x264_picture_t pic_out;
615 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
618 return nal_encode( w, &pic_out, i_nal, nal );
623 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
624 hb_buffer_t ** buf_out )
626 hb_work_private_t *pv = w->private_data;
627 hb_buffer_t *in = *buf_in;
633 // EOF on input. Flush any frames still in the decoder then
634 // send the eof downstream to tell the muxer we're done.
635 x264_picture_t pic_out;
638 hb_buffer_t *last_buf = NULL;
642 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
646 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
650 if ( last_buf == NULL )
653 last_buf->next = buf;
657 // Flushed everything - add the eof to the end of the chain.
658 if ( last_buf == NULL )
667 // Not EOF - encode the packet & wrap it in a NAL
670 // if we're re-ordering frames, check if this frame is too large to reorder
671 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
673 // This frame's duration is larger than the time allotted for b-frame
674 // reordering. That means that if it's used as a reference the decoder
675 // won't be able to move it early enough to render it in correct
676 // sequence & the playback will have odd jumps & twitches. To make
677 // sure this doesn't happen we pretend this frame is multiple
678 // frames, each with duration <= init_delay. Since each of these
679 // new frames contains the same image the visual effect is identical
680 // to the original but the resulting stream can now be coded without
681 // error. We take advantage of the fact that x264 buffers frame
682 // data internally to feed the same image into the encoder multiple
683 // times, just changing its start & stop times each time.
685 int64_t orig_stop = in->stop;
686 int64_t new_stop = in->start;
687 hb_buffer_t *last_buf = NULL;
689 // We want to spread the new frames uniformly over the total time
690 // so that we don't end up with a very short frame at the end.
691 // In the number of pieces calculation we add in init_delay-1 to
692 // round up but not add an extra piece if the frame duration is
693 // a multiple of init_delay. The final increment of frame_dur is
694 // to restore the bits that got truncated by the divide on the
695 // previous line. If we don't do this we end up with an extra tiny
696 // frame at the end whose duration is npieces-1.
697 int64_t frame_dur = orig_stop - new_stop;
698 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
699 frame_dur /= npieces;
702 while ( in->start < orig_stop )
704 new_stop += frame_dur;
705 if ( new_stop > orig_stop )
706 new_stop = orig_stop;
708 hb_buffer_t *buf = x264_encode( w, in );
712 if ( last_buf == NULL )
715 last_buf->next = buf;
718 in->start = new_stop;
724 *buf_out = x264_encode( w, in );