1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
53 int chap_mark; // saved chap mark when we're propagating it
54 int64_t last_stop; // Debugging - stop time of previous input frame
60 } frame_info[FRAME_INFO_SIZE];
65 /***********************************************************************
66 * hb_work_encx264_init
67 ***********************************************************************
69 **********************************************************************/
70 int encx264Init( hb_work_object_t * w, hb_job_t * job )
77 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
82 memset( pv->filename, 0, 1024 );
83 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
85 x264_param_default( ¶m );
87 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
88 param.i_width = job->width;
89 param.i_height = job->height;
90 param.i_fps_num = job->vrate;
91 param.i_fps_den = job->vrate_base;
93 /* Set min:max key intervals ratio to 1:10 of fps.
94 * This section is skipped if fps=25 (default).
96 if (job->vrate_base != 1080000)
98 int fps = job->vrate / job->vrate_base;
100 /* adjust +1 when fps has remainder to bump { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
101 if (job->vrate % job->vrate_base)
104 param.i_keyint_min = fps;
105 param.i_keyint_max = fps * 10;
107 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
110 param.i_log_level = X264_LOG_INFO;
111 if( job->h264_level )
114 param.i_level_idc = job->h264_level;
115 hb_log( "encx264: encoding at level %i",
120 This section passes the string x264opts to libx264 for parsing into
121 parameter names and values.
123 The string is set up like this:
124 option1=value1:option2=value 2
126 So, you have to iterate through based on the colons, and then put
127 the left side of the equals sign in "name" and the right side into
128 "value." Then you hand those strings off to x264 for interpretation.
130 This is all based on the universal x264 option handling Loren
131 Merritt implemented in the Mplayer/Mencoder project.
134 if( job->x264opts != NULL && *job->x264opts != '\0' )
136 char *x264opts, *x264opts_start;
138 x264opts = x264opts_start = strdup(job->x264opts);
140 while( x264opts_start && *x264opts )
142 char *name = x264opts;
146 x264opts += strcspn( x264opts, ":" );
153 value = strchr( name, '=' );
161 When B-frames are enabled, the max frame count increments
162 by 1 (regardless of the number of B-frames). If you don't
163 change the duration of the video track when you mux, libmp4
164 barfs. So, check if the x264opts are using B-frames, and
165 when they are, set the boolean job->areBframes as true.
168 if( !( strcmp( name, "bframes" ) ) )
170 if( atoi( value ) > 0 )
176 /* Note b-pyramid here, so the initial delay can be doubled */
177 if( !( strcmp( name, "b-pyramid" ) ) )
181 if( atoi( value ) > 0 )
192 /* Here's where the strings are passed to libx264 for parsing. */
193 ret = x264_param_parse( ¶m, name, value );
195 /* Let x264 sanity check the options for us*/
196 if( ret == X264_PARAM_BAD_NAME )
197 hb_log( "x264 options: Unknown suboption %s", name );
198 if( ret == X264_PARAM_BAD_VALUE )
199 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
201 free(x264opts_start);
204 /* set up the VUI color model & gamma to match what the COLR atom
205 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
206 if( job->color_matrix == 1 )
208 // ITU BT.601 DVD or SD TV content
209 param.vui.i_colorprim = 6;
210 param.vui.i_transfer = 1;
211 param.vui.i_colmatrix = 6;
213 else if( job->color_matrix == 2 )
215 // ITU BT.709 HD content
216 param.vui.i_colorprim = 1;
217 param.vui.i_transfer = 1;
218 param.vui.i_colmatrix = 1;
220 else if ( job->title->width >= 1280 || job->title->height >= 720 )
222 // we guess that 720p or above is ITU BT.709 HD content
223 param.vui.i_colorprim = 1;
224 param.vui.i_transfer = 1;
225 param.vui.i_colmatrix = 1;
229 // ITU BT.601 DVD or SD TV content
230 param.vui.i_colorprim = 6;
231 param.vui.i_transfer = 1;
232 param.vui.i_colmatrix = 6;
235 if( job->anamorphic.mode )
237 param.vui.i_sar_width = job->anamorphic.par_width;
238 param.vui.i_sar_height = job->anamorphic.par_height;
240 hb_log( "encx264: encoding with stored aspect %d/%d",
241 param.vui.i_sar_width, param.vui.i_sar_height );
245 if( job->vquality > 0.0 && job->vquality < 1.0 )
251 param.rc.i_rc_method = X264_RC_CRF;
252 param.rc.f_rf_constant = 51 - job->vquality * 51;
253 hb_log( "encx264: Encoding at constant RF %f",
254 param.rc.f_rf_constant );
259 param.rc.i_rc_method = X264_RC_CQP;
260 param.rc.i_qp_constant = 51 - job->vquality * 51;
261 hb_log( "encx264: encoding at constant QP %d",
262 param.rc.i_qp_constant );
266 else if( job->vquality == 0 || job->vquality >= 1.0 )
268 /* Use the vquality as a raw RF or QP
269 instead of treating it like a percentage. */
274 param.rc.i_rc_method = X264_RC_CRF;
275 param.rc.f_rf_constant = job->vquality;
276 hb_log( "encx264: Encoding at constant RF %f",
277 param.rc.f_rf_constant );
282 param.rc.i_rc_method = X264_RC_CQP;
283 param.rc.i_qp_constant = job->vquality;
284 hb_log( "encx264: encoding at constant QP %d",
285 param.rc.i_qp_constant );
292 param.rc.i_rc_method = X264_RC_ABR;
293 param.rc.i_bitrate = job->vbitrate;
297 param.rc.b_stat_write = 1;
298 param.rc.psz_stat_out = pv->filename;
301 param.rc.b_stat_read = 1;
302 param.rc.psz_stat_in = pv->filename;
307 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
308 pv->x264 = x264_encoder_open( ¶m );
310 x264_encoder_headers( pv->x264, &nal, &nal_count );
312 /* Sequence Parameter Set */
313 x264_nal_encode( w->config->h264.sps, &nal_size, 0, &nal[1] );
314 w->config->h264.sps_length = nal_size;
316 /* Picture Parameter Set */
317 x264_nal_encode( w->config->h264.pps, &nal_size, 0, &nal[2] );
318 w->config->h264.pps_length = nal_size;
320 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
321 job->width, job->height );
323 pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
324 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
328 /* Basic initDelay value is the clockrate divided by the FPS
329 -- the length of one frame in clockticks. */
330 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
332 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
333 creates that average rate by repeating 59.95 fields so the max
334 frame size is actually 4504.5 (3 field times). The field durations
335 are computed based on quantized times (see below) so we need an extra
336 two ticks to account for the rounding. */
337 if (pv->init_delay == 3753)
338 pv->init_delay = 4507;
340 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
341 exact in the DVD 27MHz system clock but we never see that) so the
342 rates computed above are all +-1 due to quantization. Worst case
343 is when a clock-rounded-down frame is adjacent to a rounded-up frame
344 which makes one of the frames 2 ticks longer than the nominal
348 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
349 will use the duration of frames running at 23.976fps instead.. */
352 pv->init_delay = 7506;
355 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
356 pv->init_delay *= job->areBframes;
358 w->config->h264.init_delay = pv->init_delay;
363 void encx264Close( hb_work_object_t * w )
365 hb_work_private_t * pv = w->private_data;
367 * Patch the x264 allocated data back in so that x264 can free it
368 * we have been using our own buffers during the encode to avoid copying.
370 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
371 x264_picture_clean( &pv->pic_in );
372 x264_encoder_close( pv->x264 );
374 w->private_data = NULL;
380 * see comments in definition of 'frame_info' in pv struct for description
381 * of what these routines are doing.
383 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
385 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
386 pv->frame_info[i].duration = in->stop - in->start;
389 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
391 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
392 return pv->frame_info[i].duration;
395 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
396 int i_nal, x264_nal_t *nal )
398 hb_buffer_t *buf = NULL;
399 hb_work_private_t *pv = w->private_data;
400 hb_job_t *job = pv->job;
402 /* Should be way too large */
403 buf = hb_video_buffer_init( job->width, job->height );
407 // use the pts to get the original frame's duration.
408 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
409 buf->start = pic_out->i_pts;
410 buf->stop = pic_out->i_pts + duration;
412 /* Encode all the NALs we were given into buf.
413 NOTE: This code assumes one video frame per NAL (but there can
414 be other stuff like SPS and/or PPS). If there are multiple
415 frames we only get the duration of the first which will
416 eventually screw up the muxer & decoder. */
418 for( i = 0; i < i_nal; i++ )
420 int data = buf->alloc - buf->size;
421 int size = x264_nal_encode( buf->data + buf->size, &data, 1, &nal[i] );
427 if( job->mux & HB_MUX_AVI )
429 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
431 buf->frametype = HB_FRAME_KEY;
437 /* H.264 in .mp4 or .mkv */
438 int naltype = buf->data[buf->size+4] & 0x1f;
439 if ( naltype == 0x7 || naltype == 0x8 )
441 // Sequence Parameter Set & Program Parameter Set go in the
442 // mp4 header so skip them here
446 /* H.264 in mp4 (stolen from mp4creator) */
447 buf->data[buf->size+0] = ( ( size - 4 ) >> 24 ) & 0xFF;
448 buf->data[buf->size+1] = ( ( size - 4 ) >> 16 ) & 0xFF;
449 buf->data[buf->size+2] = ( ( size - 4 ) >> 8 ) & 0xFF;
450 buf->data[buf->size+3] = ( ( size - 4 ) >> 0 ) & 0xFF;
452 /* Decide what type of frame we have. */
453 switch( pic_out->i_type )
456 buf->frametype = HB_FRAME_IDR;
457 /* if we have a chapter marker pending and this
458 frame's presentation time stamp is at or after
459 the marker's time stamp, use this as the
461 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
464 buf->new_chap = pv->chap_mark;
469 buf->frametype = HB_FRAME_I;
473 buf->frametype = HB_FRAME_P;
477 buf->frametype = HB_FRAME_B;
480 /* This is for b-pyramid, which has reference b-frames
481 However, it doesn't seem to ever be used... */
483 buf->frametype = HB_FRAME_BREF;
486 // If it isn't the above, what type of frame is it??
492 /* Since libx264 doesn't tell us when b-frames are
493 themselves reference frames, figure it out on our own. */
494 if( (buf->frametype == HB_FRAME_B) &&
495 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
496 buf->frametype = HB_FRAME_BREF;
500 // make sure we found at least one video frame
501 if ( buf->size <= 0 )
503 // no video - discard the buf
504 hb_buffer_close( &buf );
509 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
511 hb_work_private_t *pv = w->private_data;
512 hb_job_t *job = pv->job;
514 /* Point x264 at our current buffers Y(UV) data. */
515 pv->pic_in.img.plane[0] = in->data;
517 int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
520 /* XXX x264 has currently no option for grayscale encoding */
521 memset( pv->pic_in.img.plane[1], 0x80, uvsize );
522 memset( pv->pic_in.img.plane[2], 0x80, uvsize );
526 /* Point x264 at our buffers (Y)UV data */
527 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
528 pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
530 if( in->new_chap && job->chapter_markers )
532 /* chapters have to start with an IDR frame so request that this
533 frame be coded as IDR. Since there may be up to 16 frames
534 currently buffered in the encoder remember the timestamp so
535 when this frame finally pops out of the encoder we'll mark
536 its buffer as the start of a chapter. */
537 pv->pic_in.i_type = X264_TYPE_IDR;
538 if( pv->next_chap == 0 )
540 pv->next_chap = in->start;
541 pv->chap_mark = in->new_chap;
543 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
548 pv->pic_in.i_type = X264_TYPE_AUTO;
550 pv->pic_in.i_qpplus1 = 0;
552 /* XXX this is temporary debugging code to check that the upstream
553 * modules (render & sync) have generated a continuous, self-consistent
554 * frame stream with the current frame's start time equal to the
555 * previous frame's stop time.
557 if( pv->last_stop != in->start )
559 hb_log("encx264 input continuity err: last stop %lld start %lld",
560 pv->last_stop, in->start);
562 pv->last_stop = in->stop;
564 // Remember info about this frame that we need to pass across
565 // the x264_encoder_encode call (since it reorders frames).
566 save_frame_info( pv, in );
568 /* Feed the input PTS to x264 so it can figure out proper output PTS */
569 pv->pic_in.i_pts = in->start;
571 x264_picture_t pic_out;
575 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
578 return nal_encode( w, &pic_out, i_nal, nal );
583 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
584 hb_buffer_t ** buf_out )
586 hb_work_private_t *pv = w->private_data;
587 hb_buffer_t *in = *buf_in;
593 // EOF on input. Flush any frames still in the decoder then
594 // send the eof downstream to tell the muxer we're done.
595 x264_picture_t pic_out;
598 hb_buffer_t *last_buf = NULL;
602 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
606 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
609 if ( last_buf == NULL )
612 last_buf->next = buf;
616 // Flushed everything - add the eof to the end of the chain.
617 if ( last_buf == NULL )
626 // Not EOF - encode the packet & wrap it in a NAL
628 // if we're re-ordering frames, check if this frame is too large to reorder
629 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
631 // This frame's duration is larger than the time allotted for b-frame
632 // reordering. That means that if it's used as a reference the decoder
633 // won't be able to move it early enough to render it in correct
634 // sequence & the playback will have odd jumps & twitches. To make
635 // sure this doesn't happen we pretend this frame is multiple
636 // frames, each with duration <= init_delay. Since each of these
637 // new frames contains the same image the visual effect is identical
638 // to the original but the resulting stream can now be coded without
639 // error. We take advantage of the fact that x264 buffers frame
640 // data internally to feed the same image into the encoder multiple
641 // times, just changing its start & stop times each time.
642 int64_t orig_stop = in->stop;
643 int64_t new_stop = in->start;
644 hb_buffer_t *last_buf = NULL;
646 // We want to spread the new frames uniformly over the total time
647 // so that we don't end up with a very short frame at the end.
648 // In the number of pieces calculation we add in init_delay-1 to
649 // round up but not add an extra piece if the frame duration is
650 // a multiple of init_delay. The final increment of frame_dur is
651 // to restore the bits that got truncated by the divide on the
652 // previous line. If we don't do this we end up with an extra tiny
653 // frame at the end whose duration is npieces-1.
654 int64_t frame_dur = orig_stop - new_stop;
655 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
656 frame_dur /= npieces;
659 while ( in->start < orig_stop )
661 new_stop += frame_dur;
662 if ( new_stop > orig_stop )
663 new_stop = orig_stop;
665 hb_buffer_t *buf = x264_encode( w, in );
668 if ( last_buf == NULL )
671 last_buf->next = buf;
674 in->start = new_stop;
679 *buf_out = x264_encode( w, in );