1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
53 int chap_mark; // saved chap mark when we're propagating it
54 int64_t last_stop; // Debugging - stop time of previous input frame
60 } frame_info[FRAME_INFO_SIZE];
65 /***********************************************************************
66 * hb_work_encx264_init
67 ***********************************************************************
69 **********************************************************************/
70 int encx264Init( hb_work_object_t * w, hb_job_t * job )
77 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
82 memset( pv->filename, 0, 1024 );
83 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
85 x264_param_default( ¶m );
87 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
88 param.i_width = job->width;
89 param.i_height = job->height;
90 param.i_fps_num = job->vrate;
91 param.i_fps_den = job->vrate_base;
93 if (job->vrate_base != 1080000)
95 /* If the fps isn't 25, adjust the key intervals. Add 1 because
96 we want 24, not 23 with a truncated remainder. */
97 param.i_keyint_min = (job->vrate / job->vrate_base) + 1;
98 param.i_keyint_max = (10 * job->vrate / job->vrate_base) + 1;
99 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
102 param.i_log_level = X264_LOG_INFO;
103 if( job->h264_level )
106 param.i_level_idc = job->h264_level;
107 hb_log( "encx264: encoding at level %i",
112 This section passes the string x264opts to libx264 for parsing into
113 parameter names and values.
115 The string is set up like this:
116 option1=value1:option2=value 2
118 So, you have to iterate through based on the colons, and then put
119 the left side of the equals sign in "name" and the right side into
120 "value." Then you hand those strings off to x264 for interpretation.
122 This is all based on the universal x264 option handling Loren
123 Merritt implemented in the Mplayer/Mencoder project.
126 if( job->x264opts != NULL && *job->x264opts != '\0' )
128 char *x264opts, *x264opts_start;
130 x264opts = x264opts_start = strdup(job->x264opts);
132 while( x264opts_start && *x264opts )
134 char *name = x264opts;
138 x264opts += strcspn( x264opts, ":" );
145 value = strchr( name, '=' );
153 When B-frames are enabled, the max frame count increments
154 by 1 (regardless of the number of B-frames). If you don't
155 change the duration of the video track when you mux, libmp4
156 barfs. So, check if the x264opts are using B-frames, and
157 when they are, set the boolean job->areBframes as true.
160 if( !( strcmp( name, "bframes" ) ) )
162 if( atoi( value ) > 0 )
168 /* Note b-pyramid here, so the initial delay can be doubled */
169 if( !( strcmp( name, "b-pyramid" ) ) )
173 if( atoi( value ) > 0 )
184 /* Here's where the strings are passed to libx264 for parsing. */
185 ret = x264_param_parse( ¶m, name, value );
187 /* Let x264 sanity check the options for us*/
188 if( ret == X264_PARAM_BAD_NAME )
189 hb_log( "x264 options: Unknown suboption %s", name );
190 if( ret == X264_PARAM_BAD_VALUE )
191 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
193 free(x264opts_start);
196 /* set up the VUI color model & gamma to match what the COLR atom
197 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
198 if( job->color_matrix == 1 )
200 // ITU BT.601 DVD or SD TV content
201 param.vui.i_colorprim = 6;
202 param.vui.i_transfer = 1;
203 param.vui.i_colmatrix = 6;
205 else if( job->color_matrix == 2 )
207 // ITU BT.709 HD content
208 param.vui.i_colorprim = 1;
209 param.vui.i_transfer = 1;
210 param.vui.i_colmatrix = 1;
212 else if ( job->title->width >= 1280 || job->title->height >= 720 )
214 // we guess that 720p or above is ITU BT.709 HD content
215 param.vui.i_colorprim = 1;
216 param.vui.i_transfer = 1;
217 param.vui.i_colmatrix = 1;
221 // ITU BT.601 DVD or SD TV content
222 param.vui.i_colorprim = 6;
223 param.vui.i_transfer = 1;
224 param.vui.i_colmatrix = 6;
227 if( job->anamorphic.mode )
229 param.vui.i_sar_width = job->anamorphic.par_width;
230 param.vui.i_sar_height = job->anamorphic.par_height;
232 hb_log( "encx264: encoding with stored aspect %d/%d",
233 param.vui.i_sar_width, param.vui.i_sar_height );
237 if( job->vquality > 0.0 && job->vquality < 1.0 )
243 param.rc.i_rc_method = X264_RC_CRF;
244 param.rc.f_rf_constant = 51 - job->vquality * 51;
245 hb_log( "encx264: Encoding at constant RF %f",
246 param.rc.f_rf_constant );
251 param.rc.i_rc_method = X264_RC_CQP;
252 param.rc.i_qp_constant = 51 - job->vquality * 51;
253 hb_log( "encx264: encoding at constant QP %d",
254 param.rc.i_qp_constant );
258 else if( job->vquality == 0 || job->vquality >= 1.0 )
260 /* Use the vquality as a raw RF or QP
261 instead of treating it like a percentage. */
266 param.rc.i_rc_method = X264_RC_CRF;
267 param.rc.f_rf_constant = job->vquality;
268 hb_log( "encx264: Encoding at constant RF %f",
269 param.rc.f_rf_constant );
274 param.rc.i_rc_method = X264_RC_CQP;
275 param.rc.i_qp_constant = job->vquality;
276 hb_log( "encx264: encoding at constant QP %d",
277 param.rc.i_qp_constant );
284 param.rc.i_rc_method = X264_RC_ABR;
285 param.rc.i_bitrate = job->vbitrate;
289 param.rc.b_stat_write = 1;
290 param.rc.psz_stat_out = pv->filename;
293 param.rc.b_stat_read = 1;
294 param.rc.psz_stat_in = pv->filename;
299 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
300 pv->x264 = x264_encoder_open( ¶m );
302 x264_encoder_headers( pv->x264, &nal, &nal_count );
304 /* Sequence Parameter Set */
305 x264_nal_encode( w->config->h264.sps, &nal_size, 0, &nal[1] );
306 w->config->h264.sps_length = nal_size;
308 /* Picture Parameter Set */
309 x264_nal_encode( w->config->h264.pps, &nal_size, 0, &nal[2] );
310 w->config->h264.pps_length = nal_size;
312 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
313 job->width, job->height );
315 pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
316 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
320 /* Basic initDelay value is the clockrate divided by the FPS
321 -- the length of one frame in clockticks. */
322 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
324 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
325 creates that average rate by repeating 59.95 fields so the max
326 frame size is actually 4504.5 (3 field times). The field durations
327 are computed based on quantized times (see below) so we need an extra
328 two ticks to account for the rounding. */
329 if (pv->init_delay == 3753)
330 pv->init_delay = 4507;
332 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
333 exact in the DVD 27MHz system clock but we never see that) so the
334 rates computed above are all +-1 due to quantization. Worst case
335 is when a clock-rounded-down frame is adjacent to a rounded-up frame
336 which makes one of the frames 2 ticks longer than the nominal
340 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
341 will use the duration of frames running at 23.976fps instead.. */
344 pv->init_delay = 7506;
347 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
348 pv->init_delay *= job->areBframes;
350 w->config->h264.init_delay = pv->init_delay;
355 void encx264Close( hb_work_object_t * w )
357 hb_work_private_t * pv = w->private_data;
359 * Patch the x264 allocated data back in so that x264 can free it
360 * we have been using our own buffers during the encode to avoid copying.
362 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
363 x264_picture_clean( &pv->pic_in );
364 x264_encoder_close( pv->x264 );
366 w->private_data = NULL;
372 * see comments in definition of 'frame_info' in pv struct for description
373 * of what these routines are doing.
375 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
377 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
378 pv->frame_info[i].duration = in->stop - in->start;
381 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
383 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
384 return pv->frame_info[i].duration;
387 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
388 int i_nal, x264_nal_t *nal )
390 hb_buffer_t *buf = NULL;
391 hb_work_private_t *pv = w->private_data;
392 hb_job_t *job = pv->job;
394 /* Should be way too large */
395 buf = hb_video_buffer_init( job->width, job->height );
399 // use the pts to get the original frame's duration.
400 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
401 buf->start = pic_out->i_pts;
402 buf->stop = pic_out->i_pts + duration;
404 /* Encode all the NALs we were given into buf.
405 NOTE: This code assumes one video frame per NAL (but there can
406 be other stuff like SPS and/or PPS). If there are multiple
407 frames we only get the duration of the first which will
408 eventually screw up the muxer & decoder. */
410 for( i = 0; i < i_nal; i++ )
412 int data = buf->alloc - buf->size;
413 int size = x264_nal_encode( buf->data + buf->size, &data, 1, &nal[i] );
419 if( job->mux & HB_MUX_AVI )
421 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
423 buf->frametype = HB_FRAME_KEY;
429 /* H.264 in .mp4 or .mkv */
430 int naltype = buf->data[buf->size+4] & 0x1f;
431 if ( naltype == 0x7 || naltype == 0x8 )
433 // Sequence Parameter Set & Program Parameter Set go in the
434 // mp4 header so skip them here
438 /* H.264 in mp4 (stolen from mp4creator) */
439 buf->data[buf->size+0] = ( ( size - 4 ) >> 24 ) & 0xFF;
440 buf->data[buf->size+1] = ( ( size - 4 ) >> 16 ) & 0xFF;
441 buf->data[buf->size+2] = ( ( size - 4 ) >> 8 ) & 0xFF;
442 buf->data[buf->size+3] = ( ( size - 4 ) >> 0 ) & 0xFF;
444 /* Decide what type of frame we have. */
445 switch( pic_out->i_type )
448 buf->frametype = HB_FRAME_IDR;
449 /* if we have a chapter marker pending and this
450 frame's presentation time stamp is at or after
451 the marker's time stamp, use this as the
453 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
456 buf->new_chap = pv->chap_mark;
461 buf->frametype = HB_FRAME_I;
465 buf->frametype = HB_FRAME_P;
469 buf->frametype = HB_FRAME_B;
472 /* This is for b-pyramid, which has reference b-frames
473 However, it doesn't seem to ever be used... */
475 buf->frametype = HB_FRAME_BREF;
478 // If it isn't the above, what type of frame is it??
484 /* Since libx264 doesn't tell us when b-frames are
485 themselves reference frames, figure it out on our own. */
486 if( (buf->frametype == HB_FRAME_B) &&
487 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
488 buf->frametype = HB_FRAME_BREF;
492 // make sure we found at least one video frame
493 if ( buf->size <= 0 )
495 // no video - discard the buf
496 hb_buffer_close( &buf );
501 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
503 hb_work_private_t *pv = w->private_data;
504 hb_job_t *job = pv->job;
506 /* Point x264 at our current buffers Y(UV) data. */
507 pv->pic_in.img.plane[0] = in->data;
509 int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
512 /* XXX x264 has currently no option for grayscale encoding */
513 memset( pv->pic_in.img.plane[1], 0x80, uvsize );
514 memset( pv->pic_in.img.plane[2], 0x80, uvsize );
518 /* Point x264 at our buffers (Y)UV data */
519 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
520 pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
522 if( in->new_chap && job->chapter_markers )
524 /* chapters have to start with an IDR frame so request that this
525 frame be coded as IDR. Since there may be up to 16 frames
526 currently buffered in the encoder remember the timestamp so
527 when this frame finally pops out of the encoder we'll mark
528 its buffer as the start of a chapter. */
529 pv->pic_in.i_type = X264_TYPE_IDR;
530 if( pv->next_chap == 0 )
532 pv->next_chap = in->start;
533 pv->chap_mark = in->new_chap;
535 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
540 pv->pic_in.i_type = X264_TYPE_AUTO;
542 pv->pic_in.i_qpplus1 = 0;
544 /* XXX this is temporary debugging code to check that the upstream
545 * modules (render & sync) have generated a continuous, self-consistent
546 * frame stream with the current frame's start time equal to the
547 * previous frame's stop time.
549 if( pv->last_stop != in->start )
551 hb_log("encx264 input continuity err: last stop %lld start %lld",
552 pv->last_stop, in->start);
554 pv->last_stop = in->stop;
556 // Remember info about this frame that we need to pass across
557 // the x264_encoder_encode call (since it reorders frames).
558 save_frame_info( pv, in );
560 /* Feed the input PTS to x264 so it can figure out proper output PTS */
561 pv->pic_in.i_pts = in->start;
563 x264_picture_t pic_out;
567 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
570 return nal_encode( w, &pic_out, i_nal, nal );
575 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
576 hb_buffer_t ** buf_out )
578 hb_work_private_t *pv = w->private_data;
579 hb_buffer_t *in = *buf_in;
585 // EOF on input. Flush any frames still in the decoder then
586 // send the eof downstream to tell the muxer we're done.
587 x264_picture_t pic_out;
590 hb_buffer_t *last_buf = NULL;
594 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
598 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
601 if ( last_buf == NULL )
604 last_buf->next = buf;
608 // Flushed everything - add the eof to the end of the chain.
609 if ( last_buf == NULL )
618 // Not EOF - encode the packet & wrap it in a NAL
620 // if we're re-ordering frames, check if this frame is too large to reorder
621 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
623 // This frame's duration is larger than the time allotted for b-frame
624 // reordering. That means that if it's used as a reference the decoder
625 // won't be able to move it early enough to render it in correct
626 // sequence & the playback will have odd jumps & twitches. To make
627 // sure this doesn't happen we pretend this frame is multiple
628 // frames, each with duration <= init_delay. Since each of these
629 // new frames contains the same image the visual effect is identical
630 // to the original but the resulting stream can now be coded without
631 // error. We take advantage of the fact that x264 buffers frame
632 // data internally to feed the same image into the encoder multiple
633 // times, just changing its start & stop times each time.
634 int64_t orig_stop = in->stop;
635 int64_t new_stop = in->start;
636 hb_buffer_t *last_buf = NULL;
638 // We want to spread the new frames uniformly over the total time
639 // so that we don't end up with a very short frame at the end.
640 // In the number of pieces calculation we add in init_delay-1 to
641 // round up but not add an extra piece if the frame duration is
642 // a multiple of init_delay. The final increment of frame_dur is
643 // to restore the bits that got truncated by the divide on the
644 // previous line. If we don't do this we end up with an extra tiny
645 // frame at the end whose duration is npieces-1.
646 int64_t frame_dur = orig_stop - new_stop;
647 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
648 frame_dur /= npieces;
651 while ( in->start < orig_stop )
653 new_stop += frame_dur;
654 if ( new_stop > orig_stop )
655 new_stop = orig_stop;
657 hb_buffer_t *buf = x264_encode( w, in );
660 if ( last_buf == NULL )
663 last_buf->next = buf;
666 in->start = new_stop;
671 *buf_out = x264_encode( w, in );